From a3ab62b4280377721600be93222457354d195310 Mon Sep 17 00:00:00 2001 From: Rui Azevedo Date: Fri, 28 Jun 2024 14:08:42 +0100 Subject: [PATCH] TODO Apply "symflower fix" to a "write-test" result of a model when it errors, so model responses can possibly be fixed Part of #213 --- evaluate/metrics/assessment.go | 7 +++ evaluate/metrics/assessment_test.go | 47 ++++++++++++++++ evaluate/task/symflower-fix.go | 32 +++++++++++ evaluate/task/task-write-test.go | 33 ++++++++++- evaluate/task/task-write-test_test.go | 81 +++++++++++++++++++++++++++ evaluate/task/task.go | 2 + 6 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 evaluate/task/symflower-fix.go diff --git a/evaluate/metrics/assessment.go b/evaluate/metrics/assessment.go index 5b788ec81..343028e90 100644 --- a/evaluate/metrics/assessment.go +++ b/evaluate/metrics/assessment.go @@ -152,3 +152,10 @@ func (a Assessments) StringCSV() (row []string) { return row } + +// CombineModelAndSymflowerFixAssessments combines the mode assessments with the ones from "symflower fix". +func CombineModelAndSymflowerFixAssessments(modelAssessment Assessments, symflowerFixAssessments Assessments) (assessments Assessments) { + symflowerFixAssessments[AssessmentKeyProcessingTime] = modelAssessment[AssessmentKeyProcessingTime] + symflowerFixAssessments[AssessmentKeyProcessingTime] + + return symflowerFixAssessments +} diff --git a/evaluate/metrics/assessment_test.go b/evaluate/metrics/assessment_test.go index 1ae4fe327..b9f779e29 100644 --- a/evaluate/metrics/assessment_test.go +++ b/evaluate/metrics/assessment_test.go @@ -272,3 +272,50 @@ func TestAssessmentsScore(t *testing.T) { ExpectedScore: uint64(9), }) } + +func TestCombineModelAndSymflowerFixAssessments(t *testing.T) { + type testCase struct { + Name string + + ModelAssessment Assessments + SymflowerFixAssessments Assessments + + ExpectedAssessments Assessments + } + + validate := func(t *testing.T, tc *testCase) { + t.Run(tc.Name, func(t *testing.T) { + actualAssessments := CombineModelAndSymflowerFixAssessments(tc.ModelAssessment, tc.SymflowerFixAssessments) + + assert.Equal(t, tc.ExpectedAssessments, actualAssessments) + }) + } + + validate(t, &testCase{ + Name: "Simple", + + ModelAssessment: Assessments{ + AssessmentKeyFilesExecuted: 2, + AssessmentKeyProcessingTime: uint64(200), + AssessmentKeyCoverage: 0, + AssessmentKeyResponseCharacterCount: 100, + AssessmentKeyGenerateTestsForFileCharacterCount: 50, + AssessmentKeyResponseNoError: 0, + AssessmentKeyResponseWithCode: 1, + AssessmentKeyResponseNoExcess: 1, + }, + SymflowerFixAssessments: Assessments{ + AssessmentKeyFilesExecuted: 1, + AssessmentKeyProcessingTime: uint64(100), + AssessmentKeyCoverage: 10, + AssessmentKeyResponseNoError: 1, + }, + + ExpectedAssessments: Assessments{ + AssessmentKeyFilesExecuted: 1, + AssessmentKeyProcessingTime: uint64(300), + AssessmentKeyCoverage: 10, + AssessmentKeyResponseNoError: 1, + }, + }) +} diff --git a/evaluate/task/symflower-fix.go b/evaluate/task/symflower-fix.go new file mode 100644 index 000000000..a441f4cbd --- /dev/null +++ b/evaluate/task/symflower-fix.go @@ -0,0 +1,32 @@ +package task + +import ( + "context" + "time" + + pkgerrors "github.com/pkg/errors" + "github.com/symflower/eval-dev-quality/evaluate/metrics" + "github.com/symflower/eval-dev-quality/language" + "github.com/symflower/eval-dev-quality/log" + "github.com/symflower/eval-dev-quality/tools" + "github.com/symflower/eval-dev-quality/util" +) + +// symflowerFix runs the "symflower fix" command and returns its execution time in milliseconds. +func symflowerFix(logger *log.Logger, modelAssessment metrics.Assessments, repositoryPath string, language language.Language) (duration uint64, err error) { + start := time.Now() + _, err = util.CommandWithResult(context.Background(), logger, &util.Command{ + Command: []string{ + tools.SymflowerPath, "fix", + "--language", language.ID(), + "--workspace", repositoryPath, + }, + + Directory: repositoryPath, + }) + if err != nil { + return 0, pkgerrors.WithStack(err) + } + + return uint64(time.Since(start).Milliseconds()), nil +} diff --git a/evaluate/task/task-write-test.go b/evaluate/task/task-write-test.go index 2b991dead..3d7a31fa8 100644 --- a/evaluate/task/task-write-test.go +++ b/evaluate/task/task-write-test.go @@ -63,6 +63,8 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme } modelAssessment := metrics.NewAssessments() + // The symflower fix assessment should show how symflower can improve the result, so initialize with the model assessment in case we don't need to fix. + symflowerAssessment := modelAssessment for _, filePath := range filePaths { if err := repository.Reset(t.Logger); err != nil { t.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err) @@ -93,7 +95,33 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme if err != nil { problems = append(problems, pkgerrors.WithMessage(err, filePath)) - continue + // Run "symflower fix" if the model response fails to execute. + if t.Language.ID() == "golang" { // Currently we only support Go for "symflower fix". + log.Print("model response alone failed execution, attempting to fix with \"symflower fix \"") + + symflowerAssessment := metrics.NewAssessments() + duration, err := symflowerFix(log, modelAssessment, dataPath, t.Language) + if err != nil { + problems = append(problems, err) + + continue + } + symflowerAssessment[metrics.AssessmentKeyProcessingTime] = duration + + coverage, ps, err := t.Language.Execute(log, dataPath) + problems = append(problems, ps...) + if err != nil { + problems = append(problems, err) + + continue + } + log.Printf("with symflower repair: Executes tests with %d coverage objects", coverage) + + symflowerAssessment.Award(metrics.AssessmentKeyFilesExecuted) + symflowerAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage) + + symflowerAssessment = metrics.CombineModelAndSymflowerFixAssessments(modelAssessment, symflowerAssessment) + } } log.Printf("Executes tests with %d coverage objects", coverage) modelAssessment.Award(metrics.AssessmentKeyFilesExecuted) @@ -101,7 +129,8 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme } repositoryAssessment = map[evaltask.Identifier]metrics.Assessments{ - IdentifierWriteTests: modelAssessment, + IdentifierWriteTests: modelAssessment, + IdentifierWriteTestsSymflowerFix: symflowerAssessment, } return repositoryAssessment, problems, nil diff --git a/evaluate/task/task-write-test_test.go b/evaluate/task/task-write-test_test.go index 5b5b2cdc6..6f8749ba2 100644 --- a/evaluate/task/task-write-test_test.go +++ b/evaluate/task/task-write-test_test.go @@ -14,6 +14,8 @@ import ( "github.com/symflower/eval-dev-quality/log" modeltesting "github.com/symflower/eval-dev-quality/model/testing" "github.com/symflower/eval-dev-quality/task" + "github.com/zimmski/osutil" + "github.com/zimmski/osutil/bytesutil" ) func TestTaskWriteTestsRun(t *testing.T) { @@ -77,4 +79,83 @@ func TestTaskWriteTestsRun(t *testing.T) { }, }) }) + + t.Run("Symflower Fix", func(t *testing.T) { + t.Run("Go", func(t *testing.T) { + validateGo := func(t *testing.T, testName string, testFileContent string, expectedAssessments map[task.Identifier]metrics.Assessments) { + temporaryDirectoryPath := t.TempDir() + repositoryPath := filepath.Join(temporaryDirectoryPath, "golang", "plain") + require.NoError(t, osutil.CopyTree(filepath.Join("..", "..", "testdata", "golang", "plain"), repositoryPath)) + + modelMock := modeltesting.NewMockModelNamed(t, "mocked-model") + modelMock.RegisterGenerateSuccess(t, IdentifierWriteTests, "plain_test.go", testFileContent, metricstesting.AssessmentsWithProcessingTime).Once() + + validate(t, &tasktesting.TestCaseTask{ + Name: testName, + + Model: modelMock, + Language: &golang.Language{}, + TestDataPath: temporaryDirectoryPath, + RepositoryPath: filepath.Join("golang", "plain"), + + ExpectedRepositoryAssessment: expectedAssessments, + ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ + filepath.Join(string(IdentifierWriteTests), "mocked-model", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { + assert.Contains(t, data, "Evaluating model \"mocked-model\"") + assert.Contains(t, data, "PASS: TestPlain") + }, + }, + }) + } + { + expectedAssessments := map[task.Identifier]metrics.Assessments{ + IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyCoverage: 10, + }, + IdentifierWriteTestsSymflowerFix: metrics.Assessments{ + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyCoverage: 10, + }, + } + validateGo(t, "Model generated correct test", bytesutil.StringTrimIndentations(` + package plain + + import "testing" + + func TestPlain(t *testing.T) { + plain() + } + `), expectedAssessments) + } + { + expectedAssessments := map[task.Identifier]metrics.Assessments{ + IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyFilesExecuted: 0, // TODO update the assessments. + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyCoverage: 0, // TODO update the assessments. + }, + IdentifierWriteTestsSymflowerFix: metrics.Assessments{ + metrics.AssessmentKeyFilesExecuted: 0, // TODO update the assessments. + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyCoverage: 0, // TODO update the assessments. + }, + } + validateGo(t, "Model generated test with unused import", bytesutil.StringTrimIndentations(` + package plain + + import ( + "testing" + "strings" + ) + + func TestPlain(t *testing.T) { + plain() + } + `), expectedAssessments) + } + }) + }) } diff --git a/evaluate/task/task.go b/evaluate/task/task.go index 5ddda6ef5..cb422e10c 100644 --- a/evaluate/task/task.go +++ b/evaluate/task/task.go @@ -33,6 +33,8 @@ func registerIdentifier(name string) (identifier evaltask.Identifier) { var ( // IdentifierWriteTests holds the identifier for the "write test" task. IdentifierWriteTests = registerIdentifier("write-tests") + // IdentifierWriteTestsSymflowerFix holds the identifier for the "write test" task with the "symflower fix" applied. + IdentifierWriteTestsSymflowerFix = registerIdentifier("write-tests-symflower-fix") // IdentifierCodeRepair holds the identifier for the "code repair" task. IdentifierCodeRepair = registerIdentifier("code-repair") )