From aa7aab2dc622142dab2e2535e3df6966314c4f65 Mon Sep 17 00:00:00 2001 From: Simon Bauer Date: Thu, 4 Jul 2024 09:08:48 +0200 Subject: [PATCH] Share logging setup between tasks Part of #200 --- evaluate/task/task-code-repair.go | 15 ++++++------- evaluate/task/task-write-test.go | 28 ++++++++++--------------- evaluate/task/task.go | 35 +++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 26 deletions(-) diff --git a/evaluate/task/task-code-repair.go b/evaluate/task/task-code-repair.go index a307ff7a7..55639a2aa 100644 --- a/evaluate/task/task-code-repair.go +++ b/evaluate/task/task-code-repair.go @@ -38,15 +38,12 @@ func (t *TaskCodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[eva pkgerrors.Wrap(evaltask.ErrTaskUnsupportedByModel, fmt.Sprintf("%q does not support %q", ctx.Model.ID(), string(t.Identifier()))) } - log, logClose, err := log.WithFile(ctx.Logger, filepath.Join(ctx.ResultPath, string(t.Identifier()), model.CleanModelNameForFileSystem(ctx.Model.ID()), ctx.Language.ID(), ctx.Repository.Name()+".log")) + logging, err := initializeLogging(ctx, t) if err != nil { return nil, nil, err } - defer logClose() - - log.Printf("Evaluating model %q on task %q using language %q and repository %q", ctx.Model.ID(), t.Identifier(), ctx.Language.ID(), ctx.Repository.Name()) defer func() { - log.Printf("Evaluated model %q on task %q using language %q and repository %q: encountered %d problems: %+v", ctx.Model.ID(), t.Identifier(), ctx.Language.ID(), ctx.Repository.Name(), len(problems), problems) + logging.finalize(problems) }() var packagePaths []string @@ -66,7 +63,7 @@ func (t *TaskCodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[eva ctx.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err) } - sourceFile, mistakes, err := t.unpackCodeRepairPackage(ctx, log, packagePath) + sourceFile, mistakes, err := t.unpackCodeRepairPackage(ctx, logging.log, packagePath) if err != nil { return nil, nil, err } @@ -81,7 +78,7 @@ func (t *TaskCodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[eva Mistakes: mistakes, }, - Logger: log, + Logger: logging.log, } assessments, err := modelCapability.RepairCode(modelContext) if err != nil { @@ -95,14 +92,14 @@ func (t *TaskCodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[eva modelAssessment.Add(assessments) modelAssessment.Award(metrics.AssessmentKeyResponseNoError) - coverage, ps, err := ctx.Language.Execute(log, packagePath) + coverage, ps, err := ctx.Language.Execute(logging.log, packagePath) problems = append(problems, ps...) if err != nil { problems = append(problems, pkgerrors.WithMessage(err, sourceFile)) continue } - log.Printf("Executes tests with %d coverage objects", coverage) + logging.log.Printf("Executes tests with %d coverage objects", coverage) modelAssessment.Award(metrics.AssessmentKeyFilesExecuted) modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, coverage) } diff --git a/evaluate/task/task-write-test.go b/evaluate/task/task-write-test.go index 72a26b56c..a16f42441 100644 --- a/evaluate/task/task-write-test.go +++ b/evaluate/task/task-write-test.go @@ -4,11 +4,9 @@ import ( "context" "errors" "fmt" - "path/filepath" pkgerrors "github.com/pkg/errors" "github.com/symflower/eval-dev-quality/evaluate/metrics" - "github.com/symflower/eval-dev-quality/log" "github.com/symflower/eval-dev-quality/model" evaltask "github.com/symflower/eval-dev-quality/task" ) @@ -31,20 +29,16 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva pkgerrors.Wrap(evaltask.ErrTaskUnsupportedByModel, fmt.Sprintf("%q does not support %q", ctx.Model.ID(), string(t.Identifier()))) } - dataPath := ctx.Repository.DataPath() - - log, logClose, err := log.WithFile(ctx.Logger, filepath.Join(ctx.ResultPath, string(t.Identifier()), model.CleanModelNameForFileSystem(ctx.Model.ID()), ctx.Language.ID(), ctx.Repository.Name()+".log")) + logging, err := initializeLogging(ctx, t) if err != nil { return nil, nil, err } - defer logClose() - - log.Printf("Evaluating model %q on task %q using language %q and repository %q", ctx.Model.ID(), t.Identifier(), ctx.Language.ID(), ctx.Repository.Name()) defer func() { - log.Printf("Evaluated model %q on task %q using language %q and repository %q: encountered %d problems: %+v", ctx.Model.ID(), t.Identifier(), ctx.Language.ID(), ctx.Repository.Name(), len(problems), problems) + logging.finalize(problems) }() - filePaths, err := ctx.Language.Files(log, dataPath) + dataPath := ctx.Repository.DataPath() + filePaths, err := ctx.Language.Files(logging.log, dataPath) if err != nil { return nil, problems, pkgerrors.WithStack(err) } @@ -65,7 +59,7 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva RepositoryPath: dataPath, FilePath: filePath, - Logger: log, + Logger: logging.log, } assessments, err := modelCapability.WriteTests(modelContext) if err != nil { @@ -79,7 +73,7 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva modelAssessmentForFile.Add(assessments) modelAssessmentForFile.Award(metrics.AssessmentKeyResponseNoError) - coverage, ps, err := ctx.Language.Execute(log, dataPath) + coverage, ps, err := ctx.Language.Execute(logging.log, dataPath) problems = append(problems, ps...) if err != nil { problems = append(problems, pkgerrors.WithMessage(err, filePath)) @@ -94,9 +88,9 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva // Run "symflower fix" if the model response fails to execute. if ctx.Language.ID() == "golang" { // Currently we only support Go for "symflower fix". - log.Print("model response alone failed execution, attempting to fix with \"symflower fix \"") + logging.log.Print("model response alone failed execution, attempting to fix with \"symflower fix \"") - duration, err := symflowerFix(log, modelAssessment, dataPath, ctx.Language) + duration, err := symflowerFix(logging.log, modelAssessment, dataPath, ctx.Language) if err != nil { problems = append(problems, err) @@ -106,7 +100,7 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva continue } - coverage, ps, err := ctx.Language.Execute(log, dataPath) + coverage, ps, err := ctx.Language.Execute(logging.log, dataPath) problems = append(problems, ps...) if err != nil { problems = append(problems, pkgerrors.WithMessage(err, "symflower fix")) @@ -116,7 +110,7 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva continue } - log.Printf("with symflower repair: Executes tests with %d coverage objects", coverage) + logging.log.Printf("with symflower repair: Executes tests with %d coverage objects", coverage) // Symflower was able to fix a failure so now update the assessment with the improved results. withSymflowerAssessmentForFile = metrics.NewAssessments() @@ -127,7 +121,7 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva withSymflowerAssessmentForFile = metrics.CombineWithSymflowerFixAssessments(modelAssessmentForFile, withSymflowerAssessmentForFile) } } else { - log.Printf("Executes tests with %d coverage objects", coverage) + logging.log.Printf("Executes tests with %d coverage objects", coverage) modelAssessmentForFile.Award(metrics.AssessmentKeyFilesExecuted) modelAssessmentForFile.AwardPoints(metrics.AssessmentKeyCoverage, coverage) } diff --git a/evaluate/task/task.go b/evaluate/task/task.go index 566e717b8..11b576b37 100644 --- a/evaluate/task/task.go +++ b/evaluate/task/task.go @@ -2,8 +2,11 @@ package task import ( "fmt" + "path/filepath" pkgerrors "github.com/pkg/errors" + "github.com/symflower/eval-dev-quality/log" + "github.com/symflower/eval-dev-quality/model" evaltask "github.com/symflower/eval-dev-quality/task" ) @@ -47,3 +50,35 @@ func TaskForIdentifier(taskIdentifier evaltask.Identifier) (task evaltask.Task, return nil, pkgerrors.Wrap(evaltask.ErrTaskUnknown, string(taskIdentifier)) } } + +// taskLogging holds common logging functionality. +type taskLogging struct { + log *log.Logger + logClose func() + ctx evaltask.Context + task evaltask.Task +} + +// initializeLogging initializes the logging. +func initializeLogging(ctx evaltask.Context, task evaltask.Task) (logging *taskLogging, err error) { + logging = &taskLogging{ + ctx: ctx, + task: task, + } + + logging.log, logging.logClose, err = log.WithFile(ctx.Logger, filepath.Join(ctx.ResultPath, string(task.Identifier()), model.CleanModelNameForFileSystem(ctx.Model.ID()), ctx.Language.ID(), ctx.Repository.Name()+".log")) + if err != nil { + return nil, err + } + + logging.log.Printf("Evaluating model %q on task %q using language %q and repository %q", ctx.Model.ID(), task.Identifier(), ctx.Language.ID(), ctx.Repository.Name()) + + return logging, nil +} + +// finalizeLogging finalizes the logging. +func (t *taskLogging) finalize(problems []error) { + t.log.Printf("Evaluated model %q on task %q using language %q and repository %q: encountered %d problems: %+v", t.ctx.Model.ID(), t.task.Identifier(), t.ctx.Language.ID(), t.ctx.Repository.Name(), len(problems), problems) + + t.logClose() +}