Skip to content

Commit

Permalink
refactor, Let the tasks return assessments per task identifiers, so e…
Browse files Browse the repository at this point in the history
…ach task can run the "symflower fix" and store its assessments separately

Part of #213
  • Loading branch information
ruiAzevedo19 committed Jul 1, 2024
1 parent 6fd5180 commit fcf95c4
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 52 deletions.
4 changes: 2 additions & 2 deletions evaluate/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
}

assessment, ps, err := task.Run(temporaryRepository)
assessments.Add(model, language, repositoryPath, taskIdentifier, assessment)
assessments.AddAssessmentPerTask(model, language, repositoryPath, assessment)
if err != nil {
ps = append(ps, err)
}
Expand Down Expand Up @@ -226,7 +226,7 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
}

assessment, ps, err := task.Run(temporaryRepository)
assessments.Add(model, language, repositoryPath, taskIdentifier, assessment)
assessments.AddAssessmentPerTask(model, language, repositoryPath, assessment)
problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
if err != nil {
ctx.Log.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err)
Expand Down
42 changes: 34 additions & 8 deletions evaluate/metrics/testing/assessments.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package metricstesting

import (
"maps"
"testing"

"golang.org/x/exp/maps"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/symflower/eval-dev-quality/language"
Expand All @@ -17,17 +19,41 @@ func AssertAssessmentsEqual(t *testing.T, expected metrics.Assessments, actual m
expected = maps.Clone(expected)
actual = maps.Clone(actual)

expected[metrics.AssessmentKeyProcessingTime] = 0
actual[metrics.AssessmentKeyProcessingTime] = 0

expected[metrics.AssessmentKeyGenerateTestsForFileCharacterCount] = 0
actual[metrics.AssessmentKeyGenerateTestsForFileCharacterCount] = 0
expected[metrics.AssessmentKeyResponseCharacterCount] = 0
actual[metrics.AssessmentKeyResponseCharacterCount] = 0
clearNonDeterministicAssessmentValues(expected)
clearNonDeterministicAssessmentValues(actual)

assert.Truef(t, expected.Equal(actual), "expected:%s\nactual:%s", expected, actual)
}

// AssertTaskAssessmentsEqual checks if the given assessments per task are equal ignoring default and nondeterministic values.
func AssertTaskAssessmentsEqual(t *testing.T, expected map[task.Identifier]metrics.Assessments, actual map[task.Identifier]metrics.Assessments) {
expected = maps.Clone(expected)
actual = maps.Clone(actual)

// The expected and actual maps must have the same task identifiers.
require.ElementsMatch(t, maps.Keys(expected), maps.Keys(actual))

// Ignore non-deterministic values.
for _, assessment := range expected {
clearNonDeterministicAssessmentValues(assessment)
}
for _, assessment := range actual {
clearNonDeterministicAssessmentValues(assessment)
}

for task, expectedAssessment := range expected {
actualAssessment := actual[task]
assert.Truef(t, expectedAssessment.Equal(actualAssessment), "task:%s\nexpected:%s\nactual:%s", task, expected, actual)
}
}

// clearNonDeterministicAssessmentValues ignores non-deterministic values such as processing time and response character count.
func clearNonDeterministicAssessmentValues(assessment metrics.Assessments) {
assessment[metrics.AssessmentKeyProcessingTime] = 0
assessment[metrics.AssessmentKeyGenerateTestsForFileCharacterCount] = 0
assessment[metrics.AssessmentKeyResponseCharacterCount] = 0
}

// AssessmentsWithProcessingTime is an empty assessment collection with positive processing time.
var AssessmentsWithProcessingTime = metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
Expand Down
7 changes: 7 additions & 0 deletions evaluate/report/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ func (a *AssessmentStore) Add(model model.Model, l language.Language, repository
assessments.Add(assessment)
}

// AddAssessmentPerTask adds new assessments per task.
func (a *AssessmentStore) AddAssessmentPerTask(model model.Model, l language.Language, repositoryPath string, taskAssessment map[task.Identifier]metrics.Assessments) {
for taskIdentifier, assessment := range taskAssessment {
a.Add(model, l, repositoryPath, taskIdentifier, assessment)
}
}

// Walk walks over all entries.
func (a *AssessmentStore) Walk(function func(m model.Model, l language.Language, r string, t task.Identifier, a metrics.Assessments) error) (err error) {
models := maps.Keys(a.store)
Expand Down
16 changes: 10 additions & 6 deletions evaluate/task/task-code-repair.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func (t *TaskCodeRepair) Identifier() evaltask.Identifier {

// Run performs source code repairing in a repository with compilation errors.
// This task requires the repository to consist of multiple packages, with each containing one faulty implementation file and a corresponding test file.
func (t *TaskCodeRepair) Run(repository evaltask.Repository) (repositoryAssessment metrics.Assessments, problems []error, err error) {
func (t *TaskCodeRepair) Run(repository evaltask.Repository) (repositoryAssessment map[evaltask.Identifier]metrics.Assessments, problems []error, err error) {
log, logClose, err := log.WithFile(t.Logger, filepath.Join(t.ResultPath, string(t.Identifier()), model.CleanModelNameForFileSystem(t.Model.ID()), t.Language.ID(), repository.Name()+".log"))
if err != nil {
return nil, nil, err
Expand All @@ -75,7 +75,7 @@ func (t *TaskCodeRepair) Run(repository evaltask.Repository) (repositoryAssessme
}
}

repositoryAssessment = metrics.NewAssessments()
modelAssessment := metrics.NewAssessments()
for _, packagePath := range packagePaths {
if err := repository.Reset(t.Logger); err != nil {
t.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
Expand Down Expand Up @@ -107,8 +107,8 @@ func (t *TaskCodeRepair) Run(repository evaltask.Repository) (repositoryAssessme
if assessments[metrics.AssessmentKeyProcessingTime] == 0 {
return nil, nil, pkgerrors.Errorf("no model response time measurement present for %q at repository %q", t.Model.ID(), repository.Name())
}
repositoryAssessment.Add(assessments)
repositoryAssessment.Award(metrics.AssessmentKeyResponseNoError)
modelAssessment.Add(assessments)
modelAssessment.Award(metrics.AssessmentKeyResponseNoError)

coverage, ps, err := t.Language.Execute(log, packagePath)
problems = append(problems, ps...)
Expand All @@ -118,8 +118,12 @@ func (t *TaskCodeRepair) Run(repository evaltask.Repository) (repositoryAssessme
continue
}
log.Printf("Executes tests with %d coverage objects", coverage)
repositoryAssessment.Award(metrics.AssessmentKeyFilesExecuted)
repositoryAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
}

repositoryAssessment = map[evaltask.Identifier]metrics.Assessments{
IdentifierCodeRepair: modelAssessment,
}

return repositoryAssessment, problems, nil
Expand Down
41 changes: 25 additions & 16 deletions evaluate/task/task-code-repair_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/symflower/eval-dev-quality/language/java"
"github.com/symflower/eval-dev-quality/log"
modeltesting "github.com/symflower/eval-dev-quality/model/testing"
"github.com/symflower/eval-dev-quality/task"
"github.com/zimmski/osutil"
"github.com/zimmski/osutil/bytesutil"
)
Expand Down Expand Up @@ -71,10 +72,12 @@ func TestTaskCodeRepairRun(t *testing.T) {
TestDataPath: temporaryDirectoryPath,
RepositoryPath: filepath.Join("golang", "mistakes"),

ExpectedRepositoryAssessment: metrics.Assessments{
metrics.AssessmentKeyCoverage: 30,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
ExpectedRepositoryAssessment: map[task.Identifier]metrics.Assessments{
IdentifierCodeRepair: metrics.Assessments{
metrics.AssessmentKeyCoverage: 30,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
},
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(IdentifierCodeRepair), "mocked-model", "golang", "golang", "mistakes.log"): func(t *testing.T, filePath, data string) {
Expand Down Expand Up @@ -134,10 +137,12 @@ func TestTaskCodeRepairRun(t *testing.T) {
TestDataPath: temporaryDirectoryPath,
RepositoryPath: filepath.Join("golang", "mistakes"),

ExpectedRepositoryAssessment: metrics.Assessments{
metrics.AssessmentKeyCoverage: 60,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
ExpectedRepositoryAssessment: map[task.Identifier]metrics.Assessments{
IdentifierCodeRepair: metrics.Assessments{
metrics.AssessmentKeyCoverage: 60,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
},
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(IdentifierCodeRepair), "mocked-model", "golang", "golang", "mistakes.log"): func(t *testing.T, filePath, data string) {
Expand Down Expand Up @@ -188,10 +193,12 @@ func TestTaskCodeRepairRun(t *testing.T) {
TestDataPath: temporaryDirectoryPath,
RepositoryPath: filepath.Join("java", "mistakes"),

ExpectedRepositoryAssessment: metrics.Assessments{
metrics.AssessmentKeyCoverage: 80,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
ExpectedRepositoryAssessment: map[task.Identifier]metrics.Assessments{
IdentifierCodeRepair: metrics.Assessments{
metrics.AssessmentKeyCoverage: 80,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
},
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(IdentifierCodeRepair), "mocked-model", "java", "java", "mistakes.log"): func(t *testing.T, filePath, data string) {
Expand Down Expand Up @@ -253,10 +260,12 @@ func TestTaskCodeRepairRun(t *testing.T) {
TestDataPath: temporaryDirectoryPath,
RepositoryPath: filepath.Join("java", "mistakes"),

ExpectedRepositoryAssessment: metrics.Assessments{
metrics.AssessmentKeyCoverage: 160,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
ExpectedRepositoryAssessment: map[task.Identifier]metrics.Assessments{
IdentifierCodeRepair: metrics.Assessments{
metrics.AssessmentKeyCoverage: 160,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
},
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(IdentifierCodeRepair), "mocked-model", "java", "java", "mistakes.log"): func(t *testing.T, filePath, data string) {
Expand Down
16 changes: 10 additions & 6 deletions evaluate/task/task-write-test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (t *TaskWriteTests) Identifier() evaltask.Identifier {
}

// TaskWriteTests generates test files for the given implementation file in a repository.
func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessment metrics.Assessments, problems []error, err error) {
func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessment map[evaltask.Identifier]metrics.Assessments, problems []error, err error) {
dataPath := repository.DataPath()

log, logClose, err := log.WithFile(t.Logger, filepath.Join(t.ResultPath, string(t.Identifier()), model.CleanModelNameForFileSystem(t.Model.ID()), t.Language.ID(), repository.Name()+".log"))
Expand All @@ -62,7 +62,7 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme
return nil, problems, pkgerrors.WithStack(err)
}

repositoryAssessment = metrics.NewAssessments()
modelAssessment := metrics.NewAssessments()
for _, filePath := range filePaths {
if err := repository.Reset(t.Logger); err != nil {
t.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
Expand All @@ -85,8 +85,8 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme
if assessments[metrics.AssessmentKeyProcessingTime] == 0 {
return nil, nil, pkgerrors.Errorf("no model response time measurement present for %q at repository %q", t.Model.ID(), repository.Name())
}
repositoryAssessment.Add(assessments)
repositoryAssessment.Award(metrics.AssessmentKeyResponseNoError)
modelAssessment.Add(assessments)
modelAssessment.Award(metrics.AssessmentKeyResponseNoError)

coverage, ps, err := t.Language.Execute(log, dataPath)
problems = append(problems, ps...)
Expand All @@ -96,8 +96,12 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme
continue
}
log.Printf("Executes tests with %d coverage objects", coverage)
repositoryAssessment.Award(metrics.AssessmentKeyFilesExecuted)
repositoryAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
}

repositoryAssessment = map[evaltask.Identifier]metrics.Assessments{
IdentifierWriteTests: modelAssessment,
}

return repositoryAssessment, problems, nil
Expand Down
9 changes: 6 additions & 3 deletions evaluate/task/task-write-test_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/symflower/eval-dev-quality/language/golang"
"github.com/symflower/eval-dev-quality/log"
modeltesting "github.com/symflower/eval-dev-quality/model/testing"
"github.com/symflower/eval-dev-quality/task"
)

func TestTaskWriteTestsRun(t *testing.T) {
Expand Down Expand Up @@ -59,9 +60,11 @@ func TestTaskWriteTestsRun(t *testing.T) {
TestDataPath: temporaryDirectoryPath,
RepositoryPath: filepath.Join("golang", "plain"),

ExpectedRepositoryAssessment: metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 2,
ExpectedRepositoryAssessment: map[task.Identifier]metrics.Assessments{
IdentifierWriteTests: metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 2,
},
},
ExpectedProblemContains: []string{
"expected 'package', found does",
Expand Down
19 changes: 11 additions & 8 deletions evaluate/task/test-integration/task_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/symflower/eval-dev-quality/language/golang"
"github.com/symflower/eval-dev-quality/log"
"github.com/symflower/eval-dev-quality/model/symflower"
"github.com/symflower/eval-dev-quality/task"
"github.com/symflower/eval-dev-quality/tools"
toolstesting "github.com/symflower/eval-dev-quality/tools/testing"
)
Expand Down Expand Up @@ -48,14 +49,16 @@ func TestTaskWriteTestsRun(t *testing.T) {
TestDataPath: filepath.Join("..", "..", "..", "testdata"),
RepositoryPath: filepath.Join("golang", "plain"),

ExpectedRepositoryAssessment: metrics.Assessments{
metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254,
metrics.AssessmentKeyResponseCharacterCount: 254,
metrics.AssessmentKeyCoverage: 10,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
metrics.AssessmentKeyResponseNoExcess: 1,
metrics.AssessmentKeyResponseWithCode: 1,
ExpectedRepositoryAssessment: map[task.Identifier]metrics.Assessments{
evaluatetask.IdentifierWriteTests: metrics.Assessments{
metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 254,
metrics.AssessmentKeyResponseCharacterCount: 254,
metrics.AssessmentKeyCoverage: 10,
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
metrics.AssessmentKeyResponseNoExcess: 1,
metrics.AssessmentKeyResponseWithCode: 1,
},
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
Expand Down
4 changes: 2 additions & 2 deletions evaluate/task/testing/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ type TestCaseTask struct {
TestDataPath string
RepositoryPath string

ExpectedRepositoryAssessment metrics.Assessments
ExpectedRepositoryAssessment map[evaltask.Identifier]metrics.Assessments
ExpectedResultFiles map[string]func(t *testing.T, filePath string, data string)
ExpectedProblemContains []string
ExpectedError error
Expand All @@ -33,7 +33,7 @@ type TestCaseTask struct {
func (tc *TestCaseTask) Validate(t *testing.T, task evaltask.Task, repository evaltask.Repository, resultPath string) {
actualRepositoryAssessment, actualProblems, actualErr := task.Run(repository)

metricstesting.AssertAssessmentsEqual(t, tc.ExpectedRepositoryAssessment, actualRepositoryAssessment)
metricstesting.AssertTaskAssessmentsEqual(t, tc.ExpectedRepositoryAssessment, actualRepositoryAssessment)
if assert.Equal(t, len(tc.ExpectedProblemContains), len(actualProblems), "problems count") {
for i, expectedProblem := range tc.ExpectedProblemContains {
actualProblem := actualProblems[i]
Expand Down
2 changes: 1 addition & 1 deletion task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type Task interface {
Identifier() (identifier Identifier)

// Run runs a task in a given repository.
Run(repository Repository) (assessments metrics.Assessments, problems []error, err error)
Run(repository Repository) (assessments map[Identifier]metrics.Assessments, problems []error, err error)
}

// Repository defines a repository to be evaluated.
Expand Down

0 comments on commit fcf95c4

Please sign in to comment.