Skip to content

Commit

Permalink
Apply "symflower fix" to a "write-test" result of a model when it err…
Browse files Browse the repository at this point in the history
…ors, so model responses can possibly be fixed

Closes of #213
  • Loading branch information
ruiAzevedo19 authored and bauersimon committed Jul 2, 2024
1 parent fcf95c4 commit 313fef3
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 6 deletions.
16 changes: 16 additions & 0 deletions evaluate/metrics/assessment.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,19 @@ func (a Assessments) StringCSV() (row []string) {

return row
}

// CombineWithSymflowerFixAssessments combines the model assessments with the ones from "symflower fix".
func CombineWithSymflowerFixAssessments(model Assessments, fixed Assessments) (combined Assessments) {
combined = NewAssessments()

combined[AssessmentKeyCoverage] = fixed[AssessmentKeyCoverage]
combined[AssessmentKeyFilesExecuted] = fixed[AssessmentKeyFilesExecuted]
combined[AssessmentKeyGenerateTestsForFileCharacterCount] = model[AssessmentKeyGenerateTestsForFileCharacterCount]
combined[AssessmentKeyProcessingTime] = model[AssessmentKeyProcessingTime] + fixed[AssessmentKeyProcessingTime]
combined[AssessmentKeyResponseCharacterCount] = model[AssessmentKeyResponseCharacterCount]
combined[AssessmentKeyResponseNoError] = model[AssessmentKeyResponseNoError]
combined[AssessmentKeyResponseNoExcess] = model[AssessmentKeyResponseNoExcess]
combined[AssessmentKeyResponseWithCode] = model[AssessmentKeyResponseWithCode]

return combined
}
51 changes: 51 additions & 0 deletions evaluate/metrics/assessment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,54 @@ func TestAssessmentsScore(t *testing.T) {
ExpectedScore: uint64(9),
})
}

func TestCombineModelAndSymflowerFixAssessments(t *testing.T) {
type testCase struct {
Name string

ModelAssessment Assessments
SymflowerFixAssessments Assessments

ExpectedAssessments Assessments
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualAssessments := CombineWithSymflowerFixAssessments(tc.ModelAssessment, tc.SymflowerFixAssessments)

assert.Equal(t, tc.ExpectedAssessments, actualAssessments)
})
}

validate(t, &testCase{
Name: "Simple",

ModelAssessment: Assessments{
AssessmentKeyFilesExecuted: 1,
AssessmentKeyProcessingTime: uint64(200),
AssessmentKeyCoverage: 0,
AssessmentKeyResponseCharacterCount: 100,
AssessmentKeyGenerateTestsForFileCharacterCount: 50,
AssessmentKeyResponseNoError: 0,
AssessmentKeyResponseWithCode: 1,
AssessmentKeyResponseNoExcess: 1,
},
SymflowerFixAssessments: Assessments{
AssessmentKeyFilesExecuted: 1,
AssessmentKeyProcessingTime: uint64(100),
AssessmentKeyCoverage: 10,
AssessmentKeyResponseNoError: 1,
},

ExpectedAssessments: Assessments{
AssessmentKeyFilesExecuted: 1,
AssessmentKeyProcessingTime: uint64(300),
AssessmentKeyCoverage: 10,
AssessmentKeyResponseCharacterCount: 100,
AssessmentKeyGenerateTestsForFileCharacterCount: 50,
AssessmentKeyResponseNoError: 0,
AssessmentKeyResponseWithCode: 1,
AssessmentKeyResponseNoExcess: 1,
},
})
}
32 changes: 32 additions & 0 deletions evaluate/task/symflower-fix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package task

import (
"context"
"time"

pkgerrors "github.com/pkg/errors"
"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/symflower/eval-dev-quality/language"
"github.com/symflower/eval-dev-quality/log"
"github.com/symflower/eval-dev-quality/tools"
"github.com/symflower/eval-dev-quality/util"
)

// symflowerFix runs the "symflower fix" command and returns its execution time in milliseconds.
func symflowerFix(logger *log.Logger, modelAssessment metrics.Assessments, repositoryPath string, language language.Language) (duration uint64, err error) {
start := time.Now()
_, err = util.CommandWithResult(context.Background(), logger, &util.Command{
Command: []string{
tools.SymflowerPath, "fix",
"--language", language.ID(),
"--workspace", repositoryPath,
},

Directory: repositoryPath,
})
if err != nil {
return 0, pkgerrors.WithStack(err)
}

return uint64(time.Since(start).Milliseconds()), nil
}
46 changes: 41 additions & 5 deletions evaluate/task/task-write-test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package task

import (
"maps"
"path/filepath"

pkgerrors "github.com/pkg/errors"
Expand Down Expand Up @@ -63,6 +64,7 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme
}

modelAssessment := metrics.NewAssessments()
var withSymflowerAssessment metrics.Assessments
for _, filePath := range filePaths {
if err := repository.Reset(t.Logger); err != nil {
t.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
Expand Down Expand Up @@ -93,15 +95,49 @@ func (t *TaskWriteTests) Run(repository evaltask.Repository) (repositoryAssessme
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))

continue
// Run "symflower fix" if the model response fails to execute.
if t.Language.ID() == "golang" { // Currently we only support Go for "symflower fix".
log.Print("model response alone failed execution, attempting to fix with \"symflower fix \"")

symflowerAssessment := metrics.NewAssessments()
duration, err := symflowerFix(log, modelAssessment, dataPath, t.Language)
if err != nil {
problems = append(problems, err)

continue
}
symflowerAssessment[metrics.AssessmentKeyProcessingTime] = duration

coverage, ps, err := t.Language.Execute(log, dataPath)
problems = append(problems, ps...)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, "symflower fix"))

continue
}
log.Printf("with symflower repair: Executes tests with %d coverage objects", coverage)

symflowerAssessment.Award(metrics.AssessmentKeyFilesExecuted)
symflowerAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)

withSymflowerAssessment = metrics.CombineWithSymflowerFixAssessments(modelAssessment, symflowerAssessment)
} else {
continue
}
} else {
log.Printf("Executes tests with %d coverage objects", coverage)
modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
}
log.Printf("Executes tests with %d coverage objects", coverage)
modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage)
}

// The symflower fix assessment should show how symflower can improve the result, so in case we did not need symflower use the original model assessment.
if withSymflowerAssessment == nil {
withSymflowerAssessment = maps.Clone(modelAssessment)
}
repositoryAssessment = map[evaltask.Identifier]metrics.Assessments{
IdentifierWriteTests: modelAssessment,
IdentifierWriteTests: modelAssessment,
IdentifierWriteTestsSymflowerFix: withSymflowerAssessment,
}

return repositoryAssessment, problems, nil
Expand Down
83 changes: 83 additions & 0 deletions evaluate/task/task-write-test_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"github.com/symflower/eval-dev-quality/log"
modeltesting "github.com/symflower/eval-dev-quality/model/testing"
"github.com/symflower/eval-dev-quality/task"
"github.com/zimmski/osutil"
"github.com/zimmski/osutil/bytesutil"
)

func TestTaskWriteTestsRun(t *testing.T) {
Expand Down Expand Up @@ -77,4 +79,85 @@ func TestTaskWriteTestsRun(t *testing.T) {
},
})
})

t.Run("Symflower Fix", func(t *testing.T) {
t.Run("Go", func(t *testing.T) {
validateGo := func(t *testing.T, testName string, testFileContent string, expectedAssessments map[task.Identifier]metrics.Assessments, expectedProblems []string) {
temporaryDirectoryPath := t.TempDir()
repositoryPath := filepath.Join(temporaryDirectoryPath, "golang", "plain")
require.NoError(t, osutil.CopyTree(filepath.Join("..", "..", "testdata", "golang", "plain"), repositoryPath))

modelMock := modeltesting.NewMockModelNamed(t, "mocked-model")
modelMock.RegisterGenerateSuccess(t, IdentifierWriteTests, "plain_test.go", testFileContent, metricstesting.AssessmentsWithProcessingTime).Once()

validate(t, &tasktesting.TestCaseTask{
Name: testName,

Model: modelMock,
Language: &golang.Language{},
TestDataPath: temporaryDirectoryPath,
RepositoryPath: filepath.Join("golang", "plain"),

ExpectedRepositoryAssessment: expectedAssessments,
ExpectedProblemContains: expectedProblems,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(IdentifierWriteTests), "mocked-model", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "Evaluating model \"mocked-model\"")
assert.Contains(t, data, "PASS: TestPlain")
},
},
})
}
{
expectedAssessments := map[task.Identifier]metrics.Assessments{
IdentifierWriteTests: metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
metrics.AssessmentKeyCoverage: 10,
},
IdentifierWriteTestsSymflowerFix: metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
metrics.AssessmentKeyCoverage: 10,
},
}
validateGo(t, "Model generated correct test", bytesutil.StringTrimIndentations(`
package plain
import "testing"
func TestPlain(t *testing.T) {
plain()
}
`), expectedAssessments, nil)
}
{
expectedAssessments := map[task.Identifier]metrics.Assessments{
IdentifierWriteTests: metrics.Assessments{
metrics.AssessmentKeyResponseNoError: 1,
},
IdentifierWriteTestsSymflowerFix: metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 1,
metrics.AssessmentKeyCoverage: 10,
},
}
expectedProblems := []string{
"imported and not used",
}
validateGo(t, "Model generated test with unused import", bytesutil.StringTrimIndentations(`
package plain
import (
"testing"
"strings"
)
func TestPlain(t *testing.T) {
plain()
}
`), expectedAssessments, expectedProblems)
}
})
})
}
2 changes: 2 additions & 0 deletions evaluate/task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ func registerIdentifier(name string) (identifier evaltask.Identifier) {
var (
// IdentifierWriteTests holds the identifier for the "write test" task.
IdentifierWriteTests = registerIdentifier("write-tests")
// IdentifierWriteTestsSymflowerFix holds the identifier for the "write test" task with the "symflower fix" applied.
IdentifierWriteTestsSymflowerFix = registerIdentifier("write-tests-symflower-fix")
// IdentifierCodeRepair holds the identifier for the "code repair" task.
IdentifierCodeRepair = registerIdentifier("code-repair")
)
Expand Down
2 changes: 1 addition & 1 deletion tools/symflower.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (*symflower) CheckVersion(logger *log.Logger, binaryPath string) (err error
}

// SymflowerVersionRequired holds the version of Symflower required for this revision of the evaluation.
const SymflowerVersionRequired = "37153"
const SymflowerVersionRequired = "38036"

// RequiredVersion returns the required version of the tool.
func (*symflower) RequiredVersion() string {
Expand Down

0 comments on commit 313fef3

Please sign in to comment.