Skip to content

Commit

Permalink
fix, Score with passing tests in code-repair task cause coverage can …
Browse files Browse the repository at this point in the history
…be cheated

Models can just add statements to get more coverage.

Fixes #320
  • Loading branch information
bauersimon committed Aug 19, 2024
1 parent ce536a4 commit b508cb1
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
5 changes: 3 additions & 2 deletions evaluate/task/task-code-repair.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,10 @@ func (t *TaskCodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[eva

continue
}
taskLogger.Printf("Executes tests with %d coverage objects", testResult.Coverage)
testsPassing := testResult.TestsPass
taskLogger.Printf("Executes tests with %d tests passing", testsPassing)
modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, testResult.Coverage)
modelAssessment.AwardPoints(metrics.AssessmentKeyTestsPassing, uint64(testsPassing))
}

repositoryAssessment = map[evaltask.Identifier]metrics.Assessments{
Expand Down
8 changes: 4 additions & 4 deletions evaluate/task/task-code-repair_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ func TestTaskCodeRepairRun(t *testing.T) {

ExpectedRepositoryAssessment: map[evaltask.Identifier]metrics.Assessments{
IdentifierCodeRepair: metrics.Assessments{
metrics.AssessmentKeyCoverage: 30,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 1,
metrics.AssessmentKeyResponseNoError: 1,
metrics.AssessmentKeyTestsPassing: 40,
},
},
ValidateLog: func(t *testing.T, data string) {
Expand Down Expand Up @@ -135,10 +135,10 @@ func TestTaskCodeRepairRun(t *testing.T) {

ExpectedRepositoryAssessment: map[evaltask.Identifier]metrics.Assessments{
IdentifierCodeRepair: metrics.Assessments{
metrics.AssessmentKeyCoverage: 60,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyTestsPassing: 80,
},
},
ValidateLog: func(t *testing.T, data string) {
Expand Down Expand Up @@ -190,10 +190,10 @@ func TestTaskCodeRepairRun(t *testing.T) {

ExpectedRepositoryAssessment: map[evaltask.Identifier]metrics.Assessments{
IdentifierCodeRepair: metrics.Assessments{
metrics.AssessmentKeyCoverage: 80,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 1,
metrics.AssessmentKeyResponseNoError: 1,
metrics.AssessmentKeyTestsPassing: 30,
},
},
ValidateLog: func(t *testing.T, data string) {
Expand Down Expand Up @@ -256,10 +256,10 @@ func TestTaskCodeRepairRun(t *testing.T) {

ExpectedRepositoryAssessment: map[evaltask.Identifier]metrics.Assessments{
IdentifierCodeRepair: metrics.Assessments{
metrics.AssessmentKeyCoverage: 160,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyTestsPassing: 60,
},
},
ValidateLog: func(t *testing.T, data string) {
Expand Down

0 comments on commit b508cb1

Please sign in to comment.