Apply "symflower fix" to a "write-test" result of a model when it err…

…ors, so model responses can possibly be fixed Closes of #213
symflower · Jul 2, 2024 · 313fef3 · 313fef3
1 parent fcf95c4
commit 313fef3
Show file tree

Hide file tree

Showing 7 changed files with 226 additions and 6 deletions.
diff --git a/evaluate/metrics/assessment.go b/evaluate/metrics/assessment.go
@@ -152,3 +152,19 @@ func (a Assessments) StringCSV() (row []string) {
 
 	return row
 }
+
+// CombineWithSymflowerFixAssessments combines the model assessments with the ones from "symflower fix".
+func CombineWithSymflowerFixAssessments(model Assessments, fixed Assessments) (combined Assessments) {
+	combined = NewAssessments()
+
+	combined[AssessmentKeyCoverage] = fixed[AssessmentKeyCoverage]
+	combined[AssessmentKeyFilesExecuted] = fixed[AssessmentKeyFilesExecuted]
+	combined[AssessmentKeyGenerateTestsForFileCharacterCount] = model[AssessmentKeyGenerateTestsForFileCharacterCount]
+	combined[AssessmentKeyProcessingTime] = model[AssessmentKeyProcessingTime] + fixed[AssessmentKeyProcessingTime]
+	combined[AssessmentKeyResponseCharacterCount] = model[AssessmentKeyResponseCharacterCount]
+	combined[AssessmentKeyResponseNoError] = model[AssessmentKeyResponseNoError]
+	combined[AssessmentKeyResponseNoExcess] = model[AssessmentKeyResponseNoExcess]
+	combined[AssessmentKeyResponseWithCode] = model[AssessmentKeyResponseWithCode]
+
+	return combined
+}
diff --git a/evaluate/metrics/assessment_test.go b/evaluate/metrics/assessment_test.go
@@ -272,3 +272,54 @@ func TestAssessmentsScore(t *testing.T) {
 		ExpectedScore: uint64(9),
 	})
 }
+
+func TestCombineModelAndSymflowerFixAssessments(t *testing.T) {
+	type testCase struct {
+		Name string
+
+		ModelAssessment         Assessments
+		SymflowerFixAssessments Assessments
+
+		ExpectedAssessments Assessments
+	}
+
+	validate := func(t *testing.T, tc *testCase) {
+		t.Run(tc.Name, func(t *testing.T) {
+			actualAssessments := CombineWithSymflowerFixAssessments(tc.ModelAssessment, tc.SymflowerFixAssessments)
+
+			assert.Equal(t, tc.ExpectedAssessments, actualAssessments)
+		})
+	}
+
+	validate(t, &testCase{
+		Name: "Simple",
+
+		ModelAssessment: Assessments{
+			AssessmentKeyFilesExecuted:                      1,
+			AssessmentKeyProcessingTime:                     uint64(200),
+			AssessmentKeyCoverage:                           0,
+			AssessmentKeyResponseCharacterCount:             100,
+			AssessmentKeyGenerateTestsForFileCharacterCount: 50,
+			AssessmentKeyResponseNoError:                    0,
+			AssessmentKeyResponseWithCode:                   1,
+			AssessmentKeyResponseNoExcess:                   1,
+		},
+		SymflowerFixAssessments: Assessments{
+			AssessmentKeyFilesExecuted:   1,
+			AssessmentKeyProcessingTime:  uint64(100),
+			AssessmentKeyCoverage:        10,
+			AssessmentKeyResponseNoError: 1,
+		},
+
+		ExpectedAssessments: Assessments{
+			AssessmentKeyFilesExecuted:                      1,
+			AssessmentKeyProcessingTime:                     uint64(300),
+			AssessmentKeyCoverage:                           10,
+			AssessmentKeyResponseCharacterCount:             100,
+			AssessmentKeyGenerateTestsForFileCharacterCount: 50,
+			AssessmentKeyResponseNoError:                    0,
+			AssessmentKeyResponseWithCode:                   1,
+			AssessmentKeyResponseNoExcess:                   1,
+		},
+	})
+}
diff --git a/evaluate/task/symflower-fix.go b/evaluate/task/symflower-fix.go
@@ -0,0 +1,32 @@
+package task
+
+import (
+	"context"
+	"time"
+
+	pkgerrors "github.com/pkg/errors"
+	"github.com/symflower/eval-dev-quality/evaluate/metrics"
+	"github.com/symflower/eval-dev-quality/language"
+	"github.com/symflower/eval-dev-quality/log"
+	"github.com/symflower/eval-dev-quality/tools"
+	"github.com/symflower/eval-dev-quality/util"
+)
+
+// symflowerFix runs the "symflower fix" command and returns its execution time in milliseconds.
+func symflowerFix(logger *log.Logger, modelAssessment metrics.Assessments, repositoryPath string, language language.Language) (duration uint64, err error) {
+	start := time.Now()
+	_, err = util.CommandWithResult(context.Background(), logger, &util.Command{
+		Command: []string{
+			tools.SymflowerPath, "fix",
+			"--language", language.ID(),
+			"--workspace", repositoryPath,
+		},
+
+		Directory: repositoryPath,
+	})
+	if err != nil {
+		return 0, pkgerrors.WithStack(err)
+	}
+
+	return uint64(time.Since(start).Milliseconds()), nil
+}
diff --git a/evaluate/task/task-write-test.go b/evaluate/task/task-write-test.go
@@ -1,6 +1,7 @@
 package task
 
 import (
+	"maps"
 	"path/filepath"
 
 	pkgerrors "github.com/pkg/errors"
@@ -63,6 +64,7 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme
 	}
 
 	modelAssessment := metrics.NewAssessments()
+	var withSymflowerAssessment metrics.Assessments
 	for _, filePath := range filePaths {
 		if err := repository.Reset(t.Logger); err != nil {
 			t.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
@@ -93,15 +95,49 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme
 		if err != nil {
 			problems = append(problems, pkgerrors.WithMessage(err, filePath))
 
-			continue
+			// Run "symflower fix"  if the model response fails to execute.
+			if t.Language.ID() == "golang" { // Currently we only support Go for "symflower fix".
+				log.Print("model response alone failed execution, attempting to fix with \"symflower fix \"")
+
+				symflowerAssessment := metrics.NewAssessments()
+				duration, err := symflowerFix(log, modelAssessment, dataPath, t.Language)
+				if err != nil {
+					problems = append(problems, err)
+
+					continue
+				}
+				symflowerAssessment[metrics.AssessmentKeyProcessingTime] = duration
+
+				coverage, ps, err := t.Language.Execute(log, dataPath)
+				problems = append(problems, ps...)
+				if err != nil {
+					problems = append(problems, pkgerrors.WithMessage(err, "symflower fix"))
+
+					continue
+				}
+				log.Printf("with symflower repair: Executes tests with %d coverage objects", coverage)
+
+				symflowerAssessment.Award(metrics.AssessmentKeyFilesExecuted)
+				symflowerAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
+
+				withSymflowerAssessment = metrics.CombineWithSymflowerFixAssessments(modelAssessment, symflowerAssessment)
+			} else {
+				continue
+			}
+		} else {
+			log.Printf("Executes tests with %d coverage objects", coverage)
+			modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
+			modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
 		}
-		log.Printf("Executes tests with %d coverage objects", coverage)
-		modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
-		modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
 	}
 
+	// The symflower fix assessment should show how symflower can improve the result, so in case we did not need symflower use the original model assessment.
+	if withSymflowerAssessment == nil {
+		withSymflowerAssessment = maps.Clone(modelAssessment)
+	}
 	repositoryAssessment = map[evaltask.Identifier]metrics.Assessments{
-		IdentifierWriteTests: modelAssessment,
+		IdentifierWriteTests:             modelAssessment,
+		IdentifierWriteTestsSymflowerFix: withSymflowerAssessment,
 	}
 
 	return repositoryAssessment, problems, nil

diff --git a/evaluate/task/task-write-test_test.go b/evaluate/task/task-write-test_test.go
@@ -14,6 +14,8 @@ import (
 	"github.com/symflower/eval-dev-quality/log"
 	modeltesting "github.com/symflower/eval-dev-quality/model/testing"
 	"github.com/symflower/eval-dev-quality/task"
+	"github.com/zimmski/osutil"
+	"github.com/zimmski/osutil/bytesutil"
 )
 
 func TestTaskWriteTestsRun(t *testing.T) {
@@ -77,4 +79,85 @@ func TestTaskWriteTestsRun(t *testing.T) {
 			},
 		})
 	})
+
+	t.Run("Symflower Fix", func(t *testing.T) {
+		t.Run("Go", func(t *testing.T) {
+			validateGo := func(t *testing.T, testName string, testFileContent string, expectedAssessments map[task.Identifier]metrics.Assessments, expectedProblems []string) {
+				temporaryDirectoryPath := t.TempDir()
+				repositoryPath := filepath.Join(temporaryDirectoryPath, "golang", "plain")
+				require.NoError(t, osutil.CopyTree(filepath.Join("..", "..", "testdata", "golang", "plain"), repositoryPath))
+
+				modelMock := modeltesting.NewMockModelNamed(t, "mocked-model")
+				modelMock.RegisterGenerateSuccess(t, IdentifierWriteTests, "plain_test.go", testFileContent, metricstesting.AssessmentsWithProcessingTime).Once()
+
+				validate(t, &tasktesting.TestCaseTask{
+					Name: testName,
+
+					Model:          modelMock,
+					Language:       &golang.Language{},
+					TestDataPath:   temporaryDirectoryPath,
+					RepositoryPath: filepath.Join("golang", "plain"),
+
+					ExpectedRepositoryAssessment: expectedAssessments,
+					ExpectedProblemContains:      expectedProblems,
+					ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+						filepath.Join(string(IdentifierWriteTests), "mocked-model", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
+							assert.Contains(t, data, "Evaluating model \"mocked-model\"")
+							assert.Contains(t, data, "PASS: TestPlain")
+						},
+					},
+				})
+			}
+			{
+				expectedAssessments := map[task.Identifier]metrics.Assessments{
+					IdentifierWriteTests: metrics.Assessments{
+						metrics.AssessmentKeyFilesExecuted:   1,
+						metrics.AssessmentKeyResponseNoError: 1,
+						metrics.AssessmentKeyCoverage:        10,
+					},
+					IdentifierWriteTestsSymflowerFix: metrics.Assessments{
+						metrics.AssessmentKeyFilesExecuted:   1,
+						metrics.AssessmentKeyResponseNoError: 1,
+						metrics.AssessmentKeyCoverage:        10,
+					},
+				}
+				validateGo(t, "Model generated correct test", bytesutil.StringTrimIndentations(`
+					package plain
+
+					import "testing"
+
+					func TestPlain(t *testing.T) {
+						   plain()
+					}
+				`), expectedAssessments, nil)
+			}
+			{
+				expectedAssessments := map[task.Identifier]metrics.Assessments{
+					IdentifierWriteTests: metrics.Assessments{
+						metrics.AssessmentKeyResponseNoError: 1,
+					},
+					IdentifierWriteTestsSymflowerFix: metrics.Assessments{
+						metrics.AssessmentKeyFilesExecuted:   1,
+						metrics.AssessmentKeyResponseNoError: 1,
+						metrics.AssessmentKeyCoverage:        10,
+					},
+				}
+				expectedProblems := []string{
+					"imported and not used",
+				}
+				validateGo(t, "Model generated test with unused import", bytesutil.StringTrimIndentations(`
+					package plain
+
+					import (
+						"testing"
+						"strings"
+					)
+
+					func TestPlain(t *testing.T) {
+					   	plain()
+					}
+				`), expectedAssessments, expectedProblems)
+			}
+		})
+	})
 }
diff --git a/evaluate/task/task.go b/evaluate/task/task.go
@@ -33,6 +33,8 @@ func registerIdentifier(name string) (identifier evaltask.Identifier) {
 var (
 	// IdentifierWriteTests holds the identifier for the "write test" task.
 	IdentifierWriteTests = registerIdentifier("write-tests")
+	// IdentifierWriteTestsSymflowerFix holds the identifier for the "write test" task with the "symflower fix" applied.
+	IdentifierWriteTestsSymflowerFix = registerIdentifier("write-tests-symflower-fix")
 	// IdentifierCodeRepair holds the identifier for the "code repair" task.
 	IdentifierCodeRepair = registerIdentifier("code-repair")
 )

diff --git a/tools/symflower.go b/tools/symflower.go
@@ -91,7 +91,7 @@ func (*symflower) CheckVersion(logger *log.Logger, binaryPath string) (err error
 }
 
 // SymflowerVersionRequired holds the version of Symflower required for this revision of the evaluation.
-const SymflowerVersionRequired = "37153"
+const SymflowerVersionRequired = "38036"
 
 // RequiredVersion returns the required version of the tool.
 func (*symflower) RequiredVersion() string {