Switch to structural logging package to make logs better manageable

symflower · Jul 12, 2024 · 7d6cb80 · 7d6cb80
1 parent 2037283
commit 7d6cb80
Show file tree

Hide file tree

Showing 13 changed files with 568 additions and 138 deletions.
diff --git a/cmd/eval-dev-quality/cmd/command.go b/cmd/eval-dev-quality/cmd/command.go
@@ -43,6 +43,7 @@ func Execute(logger *log.Logger, arguments []string) {
 		logger.Panicf("Could not parse arguments: %+v", err)
 	}
 	if parser.Active == nil {
+		logger.SetFlags(log.FlagMessageOnly)
 		parser.WriteHelp(logger.Writer())
 	}
 }

diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go
@@ -96,16 +96,7 @@ func (command *Evaluate) SetLogger(logger *log.Logger) {
 }
 
 // Initialize initializes the command according to the arguments.
-func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context, cleanup func()) {
-	// Ensure the cleanup always runs in case there is a panic.
-	defer func() {
-		if r := recover(); r != nil {
-			if cleanup != nil {
-				cleanup()
-			}
-			panic(r)
-		}
-	}()
+func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context) {
 	evaluationContext = &evaluate.Context{}
 
 	// Check and validate common options.
@@ -197,11 +188,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
 
 	// Initialize logging within result directory.
 	{
-		log, logClose, err := log.WithFile(command.logger, filepath.Join(command.ResultPath, "evaluation.log"))
-		if err != nil {
-			command.logger.Panicf("ERROR: %s", err)
-		}
-		cleanup = logClose
+		log := command.logger.With(log.AttributeKeyResultPath, command.ResultPath)
 		command.logger = log
 		evaluationContext.Log = log
 	}
@@ -378,15 +365,14 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
 		}
 	}
 
-	return evaluationContext, cleanup
+	return evaluationContext
 }
 
 // Execute executes the command.
 func (command *Evaluate) Execute(args []string) (err error) {
 	command.timestamp = time.Now()
 
-	evaluationContext, cleanup := command.Initialize(args)
-	defer cleanup()
+	evaluationContext := command.Initialize(args)
 	if evaluationContext == nil {
 		command.logger.Panic("ERROR: empty evaluation context")
 	}

diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go
@@ -21,7 +21,6 @@ import (
 	evaluatetask "github.com/symflower/eval-dev-quality/evaluate/task"
 	"github.com/symflower/eval-dev-quality/language"
 	"github.com/symflower/eval-dev-quality/log"
-	"github.com/symflower/eval-dev-quality/model"
 	providertesting "github.com/symflower/eval-dev-quality/provider/testing"
 	"github.com/symflower/eval-dev-quality/tools"
 	toolstesting "github.com/symflower/eval-dev-quality/tools/testing"
@@ -522,7 +521,7 @@ func TestEvaluateExecute(t *testing.T) {
 							assert.Contains(t, data, "unloading model")
 						},
 						filepath.Join("result-directory", "README.md"): nil,
-						filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
+						filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
 					},
 				})
 			}
@@ -568,7 +567,7 @@ func TestEvaluateExecute(t *testing.T) {
 							assert.Contains(t, data, "response-no-error=2")
 						},
 						filepath.Join("result-directory", "README.md"): nil,
-						filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
+						filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
 					},
 				})
 			}
@@ -761,18 +760,14 @@ func TestEvaluateInitialize(t *testing.T) {
 
 			if tc.ValidatePanic != "" {
 				assert.PanicsWithValue(t, tc.ValidatePanic, func() {
-					c, cleanup := tc.Command.Initialize([]string{})
-					cleanup()
-					actualEvaluationContext = c
+					actualEvaluationContext = tc.Command.Initialize([]string{})
 				})
 
 				return
 			}
 
 			assert.NotPanics(t, func() {
-				c, cleanup := tc.Command.Initialize([]string{})
-				cleanup()
-				actualEvaluationContext = c
+				actualEvaluationContext = tc.Command.Initialize([]string{})
 			})
 
 			if tc.ValidateCommand != nil {

diff --git a/cmd/eval-dev-quality/cmd/version.go b/cmd/eval-dev-quality/cmd/version.go
@@ -24,7 +24,7 @@ func (command *Version) Execute(args []string) (err error) {
 	if revision == "" {
 		revision = "development"
 	}
-	command.logger.SetFlags(0) // Remove the timestamp and everything from the log output while still being able to test it.
+	command.logger.SetFlags(log.FlagMessageOnly) // Remove the timestamp and everything from the log output while still being able to test it.
 	command.logger.Printf("eval-dev-quality version %s - revision %s", evaluate.Version, revision)
 
 	return nil

diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go
@@ -99,18 +99,24 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
 
 			temporaryRepositories[repositoryPath] = temporaryRepository
 		}
+
+		logger := ctx.Log
 		for rl := uint(0); rl < ctx.runsAtLanguageLevel(); rl++ {
 			if ctx.Runs > 1 && !ctx.RunsSequential {
-				ctx.Log.Printf("Run %d/%d", rl+1, ctx.Runs)
+				logger.Printf("Run %d/%d", rl+1, ctx.Runs)
 			}
 
 			for _, language := range ctx.Languages {
+				logger := logger.With(log.AttributeKeyLanguage, language.ID())
+
 				languageID := language.ID()
 				repositoryPath := filepath.Join(language.ID(), RepositoryPlainName)
 				temporaryRepository := temporaryRepositories[repositoryPath]
 
+				logger = logger.With(log.AttributeKeyRepository, temporaryRepository.Name())
 				for _, model := range ctx.Models {
 					modelID := model.ID()
+					logger := logger.With(log.AttributeKeyModel, modelID)
 
 					if modelSucceededBasicChecksOfLanguage[model] == nil {
 						modelSucceededBasicChecksOfLanguage[model] = map[evallanguage.Language]bool{}
@@ -123,16 +129,18 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
 					for _, taskIdentifier := range temporaryRepository.SupportedTasks() {
 						task, err := evaluatetask.TaskForIdentifier(taskIdentifier)
 						if err != nil {
-							ctx.Log.Fatal(err)
+							logger.Fatal(err)
 						}
-						withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() {
+
+						logger := logger.With(log.AttributeKeyTask, taskIdentifier)
+						withLoadedModel(logger, model, ctx.ProviderForModel[model], func() {
 							for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ {
 								if ctx.Runs > 1 && ctx.RunsSequential {
-									ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
+									logger.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
 								}
 
-								if err := temporaryRepository.Reset(ctx.Log); err != nil {
-									ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err)
+								if err := temporaryRepository.Reset(logger); err != nil {
+									logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
 								}
 
 								taskContext := evaltask.Context{
@@ -142,14 +150,14 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
 
 									ResultPath: ctx.ResultPath,
 
-									Logger: ctx.Log,
+									Logger: logger,
 								}
 								assessment, ps, err := task.Run(taskContext)
 								if err != nil {
 									ps = append(ps, err)
 								}
 								if len(ps) > 0 {
-									ctx.Log.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps)
+									logger.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps)
 									problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
 								} else {
 									modelSucceededBasicChecksOfLanguage[model][language] = true
@@ -196,18 +204,20 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
 			temporaryRepositories[repositoryPath] = temporaryRepository
 		}
 	}
+	logger := ctx.Log
 	for rl := uint(0); rl < ctx.runsAtLanguageLevel(); rl++ {
 		if ctx.Runs > 1 && !ctx.RunsSequential {
-			ctx.Log.Printf("Run %d/%d", rl+1, ctx.Runs)
+			logger.Printf("Run %d/%d", rl+1, ctx.Runs)
 		}
 
 		for _, language := range ctx.Languages {
 			languageID := language.ID()
+			logger := logger.With(log.AttributeKeyLanguage, languageID)
 
 			languagePath := filepath.Join(ctx.TestdataPath, languageID)
 			repositories, err := os.ReadDir(languagePath)
 			if err != nil {
-				ctx.Log.Panicf("ERROR: language path %q cannot be accessed: %s", languagePath, err)
+				logger.Panicf("ERROR: language path %q cannot be accessed: %s", languagePath, err)
 			}
 
 			for _, repository := range repositories {
@@ -223,27 +233,30 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
 					continue
 				}
 
+				logger = logger.With(log.AttributeKeyRepository, repositoryPath)
 				for _, model := range ctx.Models {
 					modelID := model.ID()
+					logger := logger.With(log.AttributeKeyModel, modelID)
 
 					if !ctx.NoDisqualification && !modelSucceededBasicChecksOfLanguage[model][language] {
-						ctx.Log.Printf("Excluding model %q for language %q cause it did not succeed basic checks", model.ID(), language.ID())
+						logger.Printf("Excluding model %q for language %q cause it did not succeed basic checks", model.ID(), language.ID())
 
 						continue
 					}
 					for _, taskIdentifier := range temporaryRepository.Tasks {
 						task, err := evaluatetask.TaskForIdentifier(taskIdentifier)
 						if err != nil {
-							ctx.Log.Fatal(err)
+							logger.Fatal(err)
 						}
-						withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() {
+						logger := logger.With(log.AttributeKeyTask, taskIdentifier)
+						withLoadedModel(logger, model, ctx.ProviderForModel[model], func() {
 							for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ {
 								if ctx.Runs > 1 && ctx.RunsSequential {
-									ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
+									logger.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
 								}
 
-								if err := temporaryRepository.Reset(ctx.Log); err != nil {
-									ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err)
+								if err := temporaryRepository.Reset(logger); err != nil {
+									logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
 								}
 
 								taskContext := evaltask.Context{
@@ -253,12 +266,12 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
 
 									ResultPath: ctx.ResultPath,
 
-									Logger: ctx.Log,
+									Logger: logger,
 								}
 								assessment, ps, err := task.Run(taskContext)
 								problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
 								if err != nil {
-									ctx.Log.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err)
+									logger.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err)
 								}
 								assessments.AddAssessmentPerTask(model, language, repositoryPath, assessment)
 								// Write the task assessment to the evaluation CSV file.

diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go
@@ -98,11 +98,14 @@ func TestEvaluate(t *testing.T) {
 				}
 			}()
 
+			resultPath := temporaryPath
+			logger = logger.With(log.AttributeKeyResultPath, resultPath)
+
 			tc.Context.Log = logger
 			if tc.Context.QueryAttempts == 0 {
 				tc.Context.QueryAttempts = 1
 			}
-			tc.Context.ResultPath = temporaryPath
+			tc.Context.ResultPath = resultPath
 			if tc.Context.TestdataPath == "" {
 				tc.Context.TestdataPath = filepath.Join("..", "testdata")
 			}
@@ -214,6 +217,7 @@ func TestEvaluate(t *testing.T) {
 			},
 			ExpectedTotalScore: 2,
 			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+				"evaluation.log": nil,
 				filepath.Join(string(evaluatetask.IdentifierWriteTests), mockedModel.ID(), "golang", "golang", "plain.log"): nil,
 				filepath.Join("evaluation.csv"): nil,
 			},
@@ -272,7 +276,8 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 2,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
 						assert.Contains(t, data, ErrEmptyResponseFromModel.Error())
 					},
 					filepath.Join("evaluation.csv"): nil,
@@ -341,7 +346,8 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 2,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
 						assert.Contains(t, data, "Attempt 1/3: "+ErrEmptyResponseFromModel.Error())
 					},
 					filepath.Join("evaluation.csv"): nil,
@@ -409,7 +415,8 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 2,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
 						assert.Contains(t, data, "DONE 0 tests, 1 error")
 					},
 					filepath.Join("evaluation.csv"): nil,
@@ -535,8 +542,9 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 0,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"):  nil,
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"):  nil,
 					filepath.Join("evaluation.csv"): nil,
 				},
 			})
@@ -635,8 +643,9 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 0,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"):  nil,
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"):  nil,
 					filepath.Join("evaluation.csv"): nil,
 				},
 			})
@@ -701,7 +710,8 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 0,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
 					filepath.Join("evaluation.csv"): nil,
 				},
 			})
@@ -769,7 +779,8 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 6,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
 					filepath.Join("evaluation.csv"): nil,
 				},
 				ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) {
@@ -840,7 +851,8 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 6,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
 					filepath.Join("evaluation.csv"): nil,
 				},
 				ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) {
@@ -940,7 +952,8 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 6,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
 					filepath.Join("evaluation.csv"): nil,
 				},
 			})
@@ -1023,7 +1036,8 @@ func TestEvaluate(t *testing.T) {
 				},
 				ExpectedTotalScore: 6,
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-					filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
+					"evaluation.log": nil,
+					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
 					filepath.Join("evaluation.csv"): nil,
 				},
 			})
@@ -1088,7 +1102,8 @@ func TestEvaluate(t *testing.T) {
 			},
 			ExpectedTotalScore: 2,
 			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
-				filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
+				"evaluation.log": nil,
+				filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
 				filepath.Join("evaluation.csv"): nil,
 			},
 		})