diff --git a/cmd/eval-dev-quality/cmd/command.go b/cmd/eval-dev-quality/cmd/command.go index dd0cb2791..839b632ff 100644 --- a/cmd/eval-dev-quality/cmd/command.go +++ b/cmd/eval-dev-quality/cmd/command.go @@ -43,6 +43,7 @@ func Execute(logger *log.Logger, arguments []string) { logger.Panicf("Could not parse arguments: %+v", err) } if parser.Active == nil { + logger.SetFlags(log.FlagMessageOnly) parser.WriteHelp(logger.Writer()) } } diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index cdac95e66..933c924ce 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -96,16 +96,7 @@ func (command *Evaluate) SetLogger(logger *log.Logger) { } // Initialize initializes the command according to the arguments. -func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context, cleanup func()) { - // Ensure the cleanup always runs in case there is a panic. - defer func() { - if r := recover(); r != nil { - if cleanup != nil { - cleanup() - } - panic(r) - } - }() +func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context) { evaluationContext = &evaluate.Context{} // Check and validate common options. @@ -197,11 +188,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. // Initialize logging within result directory. { - log, logClose, err := log.WithFile(command.logger, filepath.Join(command.ResultPath, "evaluation.log")) - if err != nil { - command.logger.Panicf("ERROR: %s", err) - } - cleanup = logClose + log := command.logger.With(log.AttributeKeyResultPath, command.ResultPath) command.logger = log evaluationContext.Log = log } @@ -378,15 +365,14 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. } } - return evaluationContext, cleanup + return evaluationContext } // Execute executes the command. func (command *Evaluate) Execute(args []string) (err error) { command.timestamp = time.Now() - evaluationContext, cleanup := command.Initialize(args) - defer cleanup() + evaluationContext := command.Initialize(args) if evaluationContext == nil { command.logger.Panic("ERROR: empty evaluation context") } diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go index 32bbc5da4..c53740c8a 100644 --- a/cmd/eval-dev-quality/cmd/evaluate_test.go +++ b/cmd/eval-dev-quality/cmd/evaluate_test.go @@ -21,7 +21,6 @@ import ( evaluatetask "github.com/symflower/eval-dev-quality/evaluate/task" "github.com/symflower/eval-dev-quality/language" "github.com/symflower/eval-dev-quality/log" - "github.com/symflower/eval-dev-quality/model" providertesting "github.com/symflower/eval-dev-quality/provider/testing" "github.com/symflower/eval-dev-quality/tools" toolstesting "github.com/symflower/eval-dev-quality/tools/testing" @@ -522,7 +521,7 @@ func TestEvaluateExecute(t *testing.T) { assert.Contains(t, data, "unloading model") }, filepath.Join("result-directory", "README.md"): nil, - filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil, + filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil, }, }) } @@ -568,7 +567,7 @@ func TestEvaluateExecute(t *testing.T) { assert.Contains(t, data, "response-no-error=2") }, filepath.Join("result-directory", "README.md"): nil, - filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil, + filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil, }, }) } @@ -761,18 +760,14 @@ func TestEvaluateInitialize(t *testing.T) { if tc.ValidatePanic != "" { assert.PanicsWithValue(t, tc.ValidatePanic, func() { - c, cleanup := tc.Command.Initialize([]string{}) - cleanup() - actualEvaluationContext = c + actualEvaluationContext = tc.Command.Initialize([]string{}) }) return } assert.NotPanics(t, func() { - c, cleanup := tc.Command.Initialize([]string{}) - cleanup() - actualEvaluationContext = c + actualEvaluationContext = tc.Command.Initialize([]string{}) }) if tc.ValidateCommand != nil { diff --git a/cmd/eval-dev-quality/cmd/version.go b/cmd/eval-dev-quality/cmd/version.go index c70a1eba0..089179f80 100644 --- a/cmd/eval-dev-quality/cmd/version.go +++ b/cmd/eval-dev-quality/cmd/version.go @@ -24,7 +24,7 @@ func (command *Version) Execute(args []string) (err error) { if revision == "" { revision = "development" } - command.logger.SetFlags(0) // Remove the timestamp and everything from the log output while still being able to test it. + command.logger.SetFlags(log.FlagMessageOnly) // Remove the timestamp and everything from the log output while still being able to test it. command.logger.Printf("eval-dev-quality version %s - revision %s", evaluate.Version, revision) return nil diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index 412b32bb3..cda1034a1 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -99,18 +99,24 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin temporaryRepositories[repositoryPath] = temporaryRepository } + + logger := ctx.Log for rl := uint(0); rl < ctx.runsAtLanguageLevel(); rl++ { if ctx.Runs > 1 && !ctx.RunsSequential { - ctx.Log.Printf("Run %d/%d", rl+1, ctx.Runs) + logger.Printf("Run %d/%d", rl+1, ctx.Runs) } for _, language := range ctx.Languages { + logger := logger.With(log.AttributeKeyLanguage, language.ID()) + languageID := language.ID() repositoryPath := filepath.Join(language.ID(), RepositoryPlainName) temporaryRepository := temporaryRepositories[repositoryPath] + logger = logger.With(log.AttributeKeyRepository, temporaryRepository.Name()) for _, model := range ctx.Models { modelID := model.ID() + logger := logger.With(log.AttributeKeyModel, modelID) if modelSucceededBasicChecksOfLanguage[model] == nil { modelSucceededBasicChecksOfLanguage[model] = map[evallanguage.Language]bool{} @@ -123,16 +129,18 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin for _, taskIdentifier := range temporaryRepository.SupportedTasks() { task, err := evaluatetask.TaskForIdentifier(taskIdentifier) if err != nil { - ctx.Log.Fatal(err) + logger.Fatal(err) } - withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() { + + logger := logger.With(log.AttributeKeyTask, taskIdentifier) + withLoadedModel(logger, model, ctx.ProviderForModel[model], func() { for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ { if ctx.Runs > 1 && ctx.RunsSequential { - ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID) + logger.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID) } - if err := temporaryRepository.Reset(ctx.Log); err != nil { - ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err) + if err := temporaryRepository.Reset(logger); err != nil { + logger.Panicf("ERROR: unable to reset temporary repository path: %s", err) } taskContext := evaltask.Context{ @@ -142,14 +150,14 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin ResultPath: ctx.ResultPath, - Logger: ctx.Log, + Logger: logger, } assessment, ps, err := task.Run(taskContext) if err != nil { ps = append(ps, err) } if len(ps) > 0 { - ctx.Log.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps) + logger.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps) problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...) } else { modelSucceededBasicChecksOfLanguage[model][language] = true @@ -196,18 +204,20 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin temporaryRepositories[repositoryPath] = temporaryRepository } } + logger := ctx.Log for rl := uint(0); rl < ctx.runsAtLanguageLevel(); rl++ { if ctx.Runs > 1 && !ctx.RunsSequential { - ctx.Log.Printf("Run %d/%d", rl+1, ctx.Runs) + logger.Printf("Run %d/%d", rl+1, ctx.Runs) } for _, language := range ctx.Languages { languageID := language.ID() + logger := logger.With(log.AttributeKeyLanguage, languageID) languagePath := filepath.Join(ctx.TestdataPath, languageID) repositories, err := os.ReadDir(languagePath) if err != nil { - ctx.Log.Panicf("ERROR: language path %q cannot be accessed: %s", languagePath, err) + logger.Panicf("ERROR: language path %q cannot be accessed: %s", languagePath, err) } for _, repository := range repositories { @@ -223,27 +233,30 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin continue } + logger = logger.With(log.AttributeKeyRepository, repositoryPath) for _, model := range ctx.Models { modelID := model.ID() + logger := logger.With(log.AttributeKeyModel, modelID) if !ctx.NoDisqualification && !modelSucceededBasicChecksOfLanguage[model][language] { - ctx.Log.Printf("Excluding model %q for language %q cause it did not succeed basic checks", model.ID(), language.ID()) + logger.Printf("Excluding model %q for language %q cause it did not succeed basic checks", model.ID(), language.ID()) continue } for _, taskIdentifier := range temporaryRepository.Tasks { task, err := evaluatetask.TaskForIdentifier(taskIdentifier) if err != nil { - ctx.Log.Fatal(err) + logger.Fatal(err) } - withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() { + logger := logger.With(log.AttributeKeyTask, taskIdentifier) + withLoadedModel(logger, model, ctx.ProviderForModel[model], func() { for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ { if ctx.Runs > 1 && ctx.RunsSequential { - ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID) + logger.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID) } - if err := temporaryRepository.Reset(ctx.Log); err != nil { - ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err) + if err := temporaryRepository.Reset(logger); err != nil { + logger.Panicf("ERROR: unable to reset temporary repository path: %s", err) } taskContext := evaltask.Context{ @@ -253,12 +266,12 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin ResultPath: ctx.ResultPath, - Logger: ctx.Log, + Logger: logger, } assessment, ps, err := task.Run(taskContext) problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...) if err != nil { - ctx.Log.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err) + logger.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err) } assessments.AddAssessmentPerTask(model, language, repositoryPath, assessment) // Write the task assessment to the evaluation CSV file. diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index fa0ab8cbc..415eac5fd 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -98,11 +98,14 @@ func TestEvaluate(t *testing.T) { } }() + resultPath := temporaryPath + logger = logger.With(log.AttributeKeyResultPath, resultPath) + tc.Context.Log = logger if tc.Context.QueryAttempts == 0 { tc.Context.QueryAttempts = 1 } - tc.Context.ResultPath = temporaryPath + tc.Context.ResultPath = resultPath if tc.Context.TestdataPath == "" { tc.Context.TestdataPath = filepath.Join("..", "testdata") } @@ -214,6 +217,7 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 2, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ + "evaluation.log": nil, filepath.Join(string(evaluatetask.IdentifierWriteTests), mockedModel.ID(), "golang", "golang", "plain.log"): nil, filepath.Join("evaluation.csv"): nil, }, @@ -272,7 +276,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 2, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { assert.Contains(t, data, ErrEmptyResponseFromModel.Error()) }, filepath.Join("evaluation.csv"): nil, @@ -341,7 +346,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 2, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { assert.Contains(t, data, "Attempt 1/3: "+ErrEmptyResponseFromModel.Error()) }, filepath.Join("evaluation.csv"): nil, @@ -409,7 +415,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 2, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { assert.Contains(t, data, "DONE 0 tests, 1 error") }, filepath.Join("evaluation.csv"): nil, @@ -535,8 +542,9 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 0, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"): nil, + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"): nil, filepath.Join("evaluation.csv"): nil, }, }) @@ -635,8 +643,9 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 0, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"): nil, + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"): nil, filepath.Join("evaluation.csv"): nil, }, }) @@ -701,7 +710,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 0, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, filepath.Join("evaluation.csv"): nil, }, }) @@ -769,7 +779,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 6, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, filepath.Join("evaluation.csv"): nil, }, ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) { @@ -840,7 +851,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 6, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, filepath.Join("evaluation.csv"): nil, }, ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) { @@ -940,7 +952,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 6, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, filepath.Join("evaluation.csv"): nil, }, }) @@ -1023,7 +1036,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 6, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, filepath.Join("evaluation.csv"): nil, }, }) @@ -1088,7 +1102,8 @@ func TestEvaluate(t *testing.T) { }, ExpectedTotalScore: 2, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, + "evaluation.log": nil, + filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil, filepath.Join("evaluation.csv"): nil, }, }) diff --git a/evaluate/report/markdown.go b/evaluate/report/markdown.go index 35506929f..51a1c30a3 100644 --- a/evaluate/report/markdown.go +++ b/evaluate/report/markdown.go @@ -16,7 +16,7 @@ import ( "github.com/zimmski/osutil/bytesutil" "github.com/symflower/eval-dev-quality/evaluate/metrics" - "github.com/symflower/eval-dev-quality/model" + "github.com/symflower/eval-dev-quality/log" ) // Markdown holds the values for exporting a Markdown report. @@ -53,7 +53,7 @@ type markdownTemplateContext struct { // ModelLogName formats a model name to match the logging structure. func (c markdownTemplateContext) ModelLogName(modelName string) string { - modelPath := filepath.Join(c.ModelLogsPath, model.CleanModelNameForFileSystem(modelName)) + string(os.PathSeparator) + modelPath := filepath.Join(c.ModelLogsPath, log.CleanModelNameForFileSystem(modelName)) + string(os.PathSeparator) if !filepath.IsAbs(modelPath) { // Ensure we reference the models relative to the Markdown file itself. modelPath = "." + string(os.PathSeparator) + modelPath diff --git a/evaluate/task/task.go b/evaluate/task/task.go index a41d7af6d..09d546493 100644 --- a/evaluate/task/task.go +++ b/evaluate/task/task.go @@ -8,7 +8,6 @@ import ( pkgerrors "github.com/pkg/errors" "github.com/symflower/eval-dev-quality/language" "github.com/symflower/eval-dev-quality/log" - "github.com/symflower/eval-dev-quality/model" evaltask "github.com/symflower/eval-dev-quality/task" ) @@ -57,9 +56,8 @@ func TaskForIdentifier(taskIdentifier evaltask.Identifier) (task evaltask.Task, type taskLogger struct { *log.Logger - logClose func() - ctx evaltask.Context - task evaltask.Task + ctx evaltask.Context + task evaltask.Task } // newTaskLogger initializes the logging. @@ -69,11 +67,7 @@ func newTaskLogger(ctx evaltask.Context, task evaltask.Task) (logging *taskLogge task: task, } - logging.Logger, logging.logClose, err = log.WithFile(ctx.Logger, filepath.Join(ctx.ResultPath, string(task.Identifier()), model.CleanModelNameForFileSystem(ctx.Model.ID()), ctx.Language.ID(), ctx.Repository.Name()+".log")) - if err != nil { - return nil, err - } - + logging.Logger = ctx.Logger logging.Logger.Printf("Evaluating model %q on task %q using language %q and repository %q", ctx.Model.ID(), task.Identifier(), ctx.Language.ID(), ctx.Repository.Name()) return logging, nil @@ -82,8 +76,6 @@ func newTaskLogger(ctx evaltask.Context, task evaltask.Task) (logging *taskLogge // finalizeLogging finalizes the logging. func (t *taskLogger) finalize(problems []error) { t.Logger.Printf("Evaluated model %q on task %q using language %q and repository %q: encountered %d problems: %+v", t.ctx.Model.ID(), t.task.Identifier(), t.ctx.Language.ID(), t.ctx.Repository.Name(), len(problems), problems) - - t.logClose() } // packageHasSourceAndTestFile checks if a package as a source file and the corresponding test file for the given language, and returns the source file path. diff --git a/log/logger.go b/log/logger.go index f7e90ac33..867b32bef 100644 --- a/log/logger.go +++ b/log/logger.go @@ -1,27 +1,116 @@ package log import ( + "context" + "fmt" "io" - "log" + "log/slog" + "maps" "os" "path/filepath" + "slices" + "strings" + "time" pkgerrors "github.com/pkg/errors" "github.com/zimmski/osutil/bytesutil" ) +// AttributeKey defines a key for attributes handed to the structural logger. +type AttributeKey string + +const ( + AttributeKeyLanguage AttributeKey = "Language" + AttributeKeyModel = "Model" + AttributeKeyRepository = "Repository" + AttributeKeyResultPath = "ResultPath" + AttributeKeyTask = "Task" +) + +// Flags defines how log messages should be printed. +type Flags int + +const ( + FlagMessageOnly = 0 + FlagDate = 1 << iota + FlagTime + FlagStandard = FlagDate | FlagTime +) + // Logger holds a logger to log to. -type Logger = log.Logger +type Logger struct { + *slog.Logger +} // newLoggerWithWriter instantiate a logger with a writer. -func newLoggerWithWriter(writer io.Writer) *Logger { - return log.New(writer, "", log.LstdFlags) +func newLoggerWithWriter(writer io.Writer, flags Flags) *Logger { + handler := newSpawningHandler(writer, flags) + handler.logFileSpawners = defaultLogFileSpawners + + return &Logger{ + Logger: slog.New(handler), + } +} + +// SetFlags sets the flags defining how log messages should be printed. +func (l *Logger) SetFlags(flags Flags) { + if spawningHandler, ok := l.Handler().(*spawningHandler); ok { + spawningHandler.flags = flags + } else { + l.Error("Could not set flags of handler") + } +} + +// With returns a Logger that includes the given attributes in each output operation. +func (l *Logger) With(key AttributeKey, value any) *Logger { + return &Logger{ + Logger: l.Logger.With(string(key), value), + } +} + +// Print logs the given message at the "info" level. +func (l *Logger) Print(message string) { + l.Logger.Info(message) +} + +// Printf logs the given message at the "info" level. +func (l *Logger) Printf(format string, args ...any) { + l.Logger.Info(fmt.Sprintf(format, args...)) +} + +// Panicf is equivalent to "Printf" followed by a panic. +func (l *Logger) Panicf(format string, args ...any) { + message := fmt.Sprintf(format, args...) + l.Logger.Info(message) + + panic(message) +} + +// Panic is equivalent to "Print" followed by a panic. +func (l *Logger) Panic(message string) { + l.Logger.Info(message) + + panic(message) +} + +// Panic is equivalent to "Print" followed by a "os.Exit(1)". +func (l *Logger) Fatal(v ...any) { + l.Logger.Info(fmt.Sprint(v...)) + + os.Exit(1) +} + +// Writer returns a writer for writing to the logging output destination. +func (l *Logger) Writer() (writer io.Writer) { + return &handlerWriter{ + handler: l.Handler(), + } } // Buffer returns a logger that writes to a buffer. func Buffer() (buffer *bytesutil.SynchronizedBuffer, logger *Logger) { buffer = new(bytesutil.SynchronizedBuffer) - logger = newLoggerWithWriter(buffer) + logger = newLoggerWithWriter(buffer, FlagStandard) return buffer, logger } @@ -42,34 +131,210 @@ func File(path string) (logger *Logger, loggerClose func(), err error) { } } - logger = newLoggerWithWriter(file) + logger = newLoggerWithWriter(file, FlagStandard) return logger, loggerClose, nil } // STDOUT returns a logger that writes to STDOUT. func STDOUT() (logger *Logger) { - return newLoggerWithWriter(os.Stdout) + return newLoggerWithWriter(os.Stdout, FlagStandard) } -// WithFile returns a logger that writes to a file and to the parent logger at the same time. -func WithFile(parent *Logger, filePath string) (logger *Logger, loggerClose func(), err error) { +// newLogWriter returns a logger that writes to a file and to the parent logger at the same time. +func newLogWriter(parent io.Writer, filePath string) (writer io.Writer, err error) { if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil { - return nil, nil, pkgerrors.WithStack(err) + return nil, pkgerrors.WithStack(err) } file, err := os.OpenFile(filePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) if err != nil { - return nil, nil, pkgerrors.WithStack(err) + return nil, pkgerrors.WithStack(err) } - loggerClose = func() { - if err := file.Close(); err != nil { - panic(err) + + writer = io.MultiWriter(parent, file) + + return writer, nil +} + +// spawningHandler is a structural logging handler which spawns a new log file if one of the given log file spawners triggers. +type spawningHandler struct { + // writer holds the writer to write the output. + writer io.Writer + + // attributes holds the attributes handed to the logger. + attributes map[AttributeKey]string + + // logFileSpawners holds the spawners responsible for spawning a new log file. + logFileSpawners []logFileSpawner + + // flags holds the flags deciding which fields are logged. + flags Flags +} + +// newSpawningHandler returns a new spawning handler. +func newSpawningHandler(writer io.Writer, flags Flags) *spawningHandler { + return &spawningHandler{ + writer: writer, + + attributes: map[AttributeKey]string{}, + + flags: flags, + } +} + +// Clone returns a copy of the object. +func (h *spawningHandler) Clone() (clone *spawningHandler) { + return &spawningHandler{ + writer: h.writer, + + attributes: maps.Clone(h.attributes), + + logFileSpawners: slices.Clone(h.logFileSpawners), + } +} + +var _ slog.Handler = (*spawningHandler)(nil) + +// Enabled reports whether the handler handles records at the given level. +func (h *spawningHandler) Enabled(ctx context.Context, level slog.Level) bool { + return true +} + +// Handle handles the Record. +func (h *spawningHandler) Handle(ctx context.Context, record slog.Record) error { + if h.flags&FlagDate != 0 { + fmt.Fprint(h.writer, record.Time.Format("2006/01/02")) + fmt.Fprint(h.writer, " ") + } + if h.flags&FlagTime != 0 { + fmt.Fprint(h.writer, record.Time.Format("15:04:05")) + fmt.Fprint(h.writer, " ") + } + + fmt.Fprintln(h.writer, record.Message) + + return nil +} + +// WithAttrs returns a new Handler whose attributes consist of both the receiver's attributes and the arguments. +// The Handler owns the slice: it may retain, modify or discard it. +func (h *spawningHandler) WithAttrs(attributes []slog.Attr) slog.Handler { + // Collect attributes. + for _, attribute := range attributes { + h.attributes[AttributeKey(attribute.Key)] = attribute.Value.String() + } + + newHandler := h.Clone() + + // Check if we need to spawn a new log file. + for i, spawner := range h.logFileSpawners { + if !spawner.NeedsSpawn(h.attributes) { + continue + } + + logFilePath := spawner.FilePath(h.attributes) + writer, err := newLogWriter(h.writer, logFilePath) + if err != nil { + fmt.Fprintf(h.writer, "ERROR: cannot create new handler: %s\n", err.Error()) + + continue } + + logMessage := fmt.Sprintf("Spawning new log file at %s", logFilePath) + h.Handle(context.Background(), slog.NewRecord(time.Now(), slog.LevelInfo, logMessage, 0)) + + newHandler.writer = writer + newHandler.logFileSpawners = slices.Delete(newHandler.logFileSpawners, i, i+1) // The currently triggered log file spawner must not be part of the new handler as it would trigger again and again. + + return newHandler } - writer := io.MultiWriter(parent.Writer(), file) - logger = newLoggerWithWriter(writer) + return newHandler +} - return logger, loggerClose, nil +// WithGroup returns a new Handler with the given group appended to the receiver's existing groups. +func (h *spawningHandler) WithGroup(string) slog.Handler { + return h +} + +// handlerWriter is an io.Writer that calls a Handler. +type handlerWriter struct { + handler slog.Handler +} + +var _ io.Writer = (*handlerWriter)(nil) + +// Write writes the given bytes to the underlying data stream. +func (w *handlerWriter) Write(buffer []byte) (int, error) { + // Remove final newline. + originalLength := len(buffer) // Report that the entire buf was written. + if len(buffer) > 0 && buffer[len(buffer)-1] == '\n' { + buffer = buffer[:len(buffer)-1] + } + + var pc uintptr + record := slog.NewRecord(time.Now(), slog.LevelInfo, string(buffer), pc) + + return originalLength, w.handler.Handle(context.Background(), record) +} + +var defaultLogFileSpawners = []logFileSpawner{ + logFileSpawner{ + NeededAttributes: []AttributeKey{ + AttributeKeyResultPath, + }, + FilePath: func(attributes map[AttributeKey]string) string { + return filepath.Join(attributes[AttributeKeyResultPath], "evaluation.log") + }, + }, + logFileSpawner{ + NeededAttributes: []AttributeKey{ + AttributeKeyResultPath, + + AttributeKeyLanguage, + AttributeKeyModel, + AttributeKeyRepository, + AttributeKeyTask, + }, + FilePath: func(attributes map[AttributeKey]string) string { + resultPath := attributes[AttributeKeyResultPath] + modelID := attributes[AttributeKeyModel] + languageID := attributes[AttributeKeyLanguage] + repositoryName := attributes[AttributeKeyRepository] + taskIdentifier := attributes[AttributeKeyTask] + + return filepath.Join(resultPath, taskIdentifier, CleanModelNameForFileSystem(modelID), languageID, repositoryName+".log") + }, + }, +} + +// logFileSpawner defines when a new log file is spawned. +type logFileSpawner struct { + // NeededAttributes holds the list of attributes that need to be set in order to spawn a new log file. + NeededAttributes []AttributeKey + // FilePath is called if all needed attributes are set and returns the file path for the new log file. + FilePath func(attributes map[AttributeKey]string) string +} + +// NeedsSpawn returns if a new log file has to be spawned. +func (s logFileSpawner) NeedsSpawn(attributes map[AttributeKey]string) bool { + for _, attributeKey := range s.NeededAttributes { + if value := attributes[attributeKey]; value == "" { + return false + } + } + + return true +} + +var cleanModelNameForFileSystemReplacer = strings.NewReplacer( + "/", "_", + "\\", "_", + ":", "_", +) + +// CleanModelNameForFileSystem cleans a model name to be useable for directory and file names on the file system. +func CleanModelNameForFileSystem(modelName string) (modelNameCleaned string) { + return cleanModelNameForFileSystemReplacer.Replace(modelName) } diff --git a/log/logger_test.go b/log/logger_test.go new file mode 100644 index 000000000..523391251 --- /dev/null +++ b/log/logger_test.go @@ -0,0 +1,210 @@ +package log + +import ( + "os" + "path/filepath" + "sort" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/zimmski/osutil" + "github.com/zimmski/osutil/bytesutil" +) + +func TestLoggerWith(t *testing.T) { + if osutil.IsWindows() { + t.Skip("cleanup of open log files unsupported") + } + + type testCase struct { + Name string + + Do func(logger *Logger, temporaryPath string) + + ExpectedLogOutput string + ExpectedFiles map[string]string + } + + cleanOutput := func(data string, temporaryPath string) (newData string) { + newData = strings.ReplaceAll(data, temporaryPath, "$TEMPORARY_PATH") + newData = strings.ReplaceAll(newData, "\\", "/") + + return newData + } + + validate := func(t *testing.T, tc *testCase) { + t.Run(tc.Name, func(t *testing.T) { + logOutput := new(bytesutil.SynchronizedBuffer) + logger := newLoggerWithWriter(logOutput, FlagMessageOnly) + + temporaryPath := t.TempDir() + + tc.Do(logger, temporaryPath) + + actualResultFiles, err := osutil.FilesRecursive(temporaryPath) + require.NoError(t, err) + for i, p := range actualResultFiles { + actualResultFiles[i], err = filepath.Rel(temporaryPath, p) + require.NoError(t, err) + } + sort.Strings(actualResultFiles) + var expectedResultFiles []string + for filePath, expectedData := range tc.ExpectedFiles { + expectedResultFiles = append(expectedResultFiles, filePath) + + data, err := os.ReadFile(filepath.Join(temporaryPath, filePath)) + require.NoError(t, err) + + actualData := string(data) + actualData = cleanOutput(actualData, temporaryPath) + expectedData = bytesutil.StringTrimIndentations(expectedData) + assert.Equal(t, expectedData, actualData) + } + sort.Strings(expectedResultFiles) + assert.Equal(t, expectedResultFiles, actualResultFiles) + + expectedLogOutput := bytesutil.StringTrimIndentations(tc.ExpectedLogOutput) + assert.Equal(t, expectedLogOutput, cleanOutput(logOutput.String(), temporaryPath)) + }) + } + + validate(t, &testCase{ + Name: "New log file for Result path", + + Do: func(logger *Logger, temporaryPath string) { + logger = logger.With(AttributeKeyResultPath, temporaryPath) + + logger.Info("Every log file") + }, + + ExpectedLogOutput: ` + Spawning new log file at $TEMPORARY_PATH/evaluation.log + Every log file + `, + ExpectedFiles: map[string]string{ + "evaluation.log": ` + Every log file + `, + }, + }) + validate(t, &testCase{ + Name: "New log file for Language-Model-Repository-Task", + + Do: func(logger *Logger, temporaryPath string) { + logger = logger.With(AttributeKeyResultPath, temporaryPath) + logger = logger.With(AttributeKeyLanguage, "languageA") + logger = logger.With(AttributeKeyModel, "modelA") + logger = logger.With(AttributeKeyRepository, "repositoryA") + logger = logger.With(AttributeKeyTask, "taskA") + + logger.Info("Every log file") + }, + + ExpectedLogOutput: ` + Spawning new log file at $TEMPORARY_PATH/evaluation.log + Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryA.log + Every log file + `, + ExpectedFiles: map[string]string{ + "evaluation.log": ` + Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryA.log + Every log file + `, + filepath.Join("taskA", "modelA", "languageA", "repositoryA.log"): ` + Every log file + `, + }, + }) + + validate(t, &testCase{ + Name: "New log file for two tasks", + + Do: func(logger *Logger, temporaryPath string) { + logger = logger.With(AttributeKeyResultPath, temporaryPath) + logger = logger.With(AttributeKeyLanguage, "languageA") + logger = logger.With(AttributeKeyModel, "modelA") + logger = logger.With(AttributeKeyRepository, "repositoryA") + + loggerA := logger.With(AttributeKeyTask, "taskA") + _ = logger.With(AttributeKeyTask, "taskB") + + loggerA.Info("Only in A log files") + }, + + ExpectedLogOutput: ` + Spawning new log file at $TEMPORARY_PATH/evaluation.log + Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryA.log + Spawning new log file at $TEMPORARY_PATH/taskB/modelA/languageA/repositoryA.log + Only in A log files + `, + ExpectedFiles: map[string]string{ + "evaluation.log": ` + Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryA.log + Spawning new log file at $TEMPORARY_PATH/taskB/modelA/languageA/repositoryA.log + Only in A log files + `, + filepath.Join("taskA", "modelA", "languageA", "repositoryA.log"): ` + Only in A log files + `, + filepath.Join("taskB", "modelA", "languageA", "repositoryA.log"): "", + }, + }) + validate(t, &testCase{ + Name: "New log file for two repositories", + + Do: func(logger *Logger, temporaryPath string) { + logger = logger.With(AttributeKeyResultPath, temporaryPath) + logger = logger.With(AttributeKeyLanguage, "languageA") + logger = logger.With(AttributeKeyModel, "modelA") + + loggerA := logger.With(AttributeKeyRepository, "repositoryA") + _ = loggerA.With(AttributeKeyTask, "taskA") + + loggerB := logger.With(AttributeKeyRepository, "repositoryB") + _ = loggerB.With(AttributeKeyTask, "taskA") + + }, + + ExpectedLogOutput: ` + Spawning new log file at $TEMPORARY_PATH/evaluation.log + Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryA.log + Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryB.log + `, + ExpectedFiles: map[string]string{ + "evaluation.log": ` + Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryA.log + Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryB.log + `, + filepath.Join("taskA", "modelA", "languageA", "repositoryA.log"): "", + filepath.Join("taskA", "modelA", "languageA", "repositoryB.log"): "", + }, + }) +} + +func TestCleanModelNameForFileSystem(t *testing.T) { + type testCase struct { + Name string + + ModelName string + + ExpectedModelNameCleaned string + } + + validate := func(t *testing.T, tc *testCase) { + t.Run(tc.Name, func(t *testing.T) { + actualModelNameCleaned := CleanModelNameForFileSystem(tc.ModelName) + + assert.Equal(t, tc.ExpectedModelNameCleaned, actualModelNameCleaned) + }) + } + + validate(t, &testCase{ + Name: "Simple", + + ModelName: "openrouter/anthropic/claude-2.0:beta", + + ExpectedModelNameCleaned: "openrouter_anthropic_claude-2.0_beta", + }) +} diff --git a/model/model.go b/model/model.go index 1c6c33cd4..5d002ffa5 100644 --- a/model/model.go +++ b/model/model.go @@ -1,8 +1,6 @@ package model import ( - "strings" - "github.com/symflower/eval-dev-quality/language" "github.com/symflower/eval-dev-quality/log" ) @@ -35,14 +33,3 @@ type SetQueryAttempts interface { // SetQueryAttempts sets the number of query attempts to perform when a model request errors in the process of solving a task. SetQueryAttempts(attempts uint) } - -var cleanModelNameForFileSystemReplacer = strings.NewReplacer( - "/", "_", - "\\", "_", - ":", "_", -) - -// CleanModelNameForFileSystem cleans a model name to be useable for directory and file names on the file system. -func CleanModelNameForFileSystem(modelName string) (modelNameCleaned string) { - return cleanModelNameForFileSystemReplacer.Replace(modelName) -} diff --git a/model/model_test.go b/model/model_test.go deleted file mode 100644 index 537c728b7..000000000 --- a/model/model_test.go +++ /dev/null @@ -1,33 +0,0 @@ -package model - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestCleanModelNameForFileSystem(t *testing.T) { - type testCase struct { - Name string - - ModelName string - - ExpectedModelNameCleaned string - } - - validate := func(t *testing.T, tc *testCase) { - t.Run(tc.Name, func(t *testing.T) { - actualModelNameCleaned := CleanModelNameForFileSystem(tc.ModelName) - - assert.Equal(t, tc.ExpectedModelNameCleaned, actualModelNameCleaned) - }) - } - - validate(t, &testCase{ - Name: "Simple", - - ModelName: "openrouter/anthropic/claude-2.0:beta", - - ExpectedModelNameCleaned: "openrouter_anthropic_claude-2.0_beta", - }) -} diff --git a/scripts/evaluation/sequential-light.go b/scripts/evaluation/sequential-light.go index 53e2a14c5..ab5f127e0 100644 --- a/scripts/evaluation/sequential-light.go +++ b/scripts/evaluation/sequential-light.go @@ -11,7 +11,6 @@ import ( "github.com/zimmski/osutil" "github.com/symflower/eval-dev-quality/log" - "github.com/symflower/eval-dev-quality/model" "github.com/symflower/eval-dev-quality/util" ) @@ -78,7 +77,7 @@ func benchmarkModel(logger *log.Logger, resultPrefix string, repository string, "eval-dev-quality", "evaluate", "--runs", "5", - "--result-path", resultPrefix + model.CleanModelNameForFileSystem(id), + "--result-path", resultPrefix + log.CleanModelNameForFileSystem(id), "--repository", filepath.Join("java", repository), "--repository", filepath.Join("golang", repository), "--model", id,