Skip to content

Commit

Permalink
Switch to structural logging package to make logs better manageable
Browse files Browse the repository at this point in the history
  • Loading branch information
ahumenberger authored and bauersimon committed Jul 12, 2024
1 parent 2037283 commit 7d6cb80
Show file tree
Hide file tree
Showing 13 changed files with 568 additions and 138 deletions.
1 change: 1 addition & 0 deletions cmd/eval-dev-quality/cmd/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func Execute(logger *log.Logger, arguments []string) {
logger.Panicf("Could not parse arguments: %+v", err)
}
if parser.Active == nil {
logger.SetFlags(log.FlagMessageOnly)
parser.WriteHelp(logger.Writer())
}
}
Expand Down
22 changes: 4 additions & 18 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,7 @@ func (command *Evaluate) SetLogger(logger *log.Logger) {
}

// Initialize initializes the command according to the arguments.
func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context, cleanup func()) {
// Ensure the cleanup always runs in case there is a panic.
defer func() {
if r := recover(); r != nil {
if cleanup != nil {
cleanup()
}
panic(r)
}
}()
func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context) {
evaluationContext = &evaluate.Context{}

// Check and validate common options.
Expand Down Expand Up @@ -197,11 +188,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.

// Initialize logging within result directory.
{
log, logClose, err := log.WithFile(command.logger, filepath.Join(command.ResultPath, "evaluation.log"))
if err != nil {
command.logger.Panicf("ERROR: %s", err)
}
cleanup = logClose
log := command.logger.With(log.AttributeKeyResultPath, command.ResultPath)
command.logger = log
evaluationContext.Log = log
}
Expand Down Expand Up @@ -378,15 +365,14 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
}
}

return evaluationContext, cleanup
return evaluationContext
}

// Execute executes the command.
func (command *Evaluate) Execute(args []string) (err error) {
command.timestamp = time.Now()

evaluationContext, cleanup := command.Initialize(args)
defer cleanup()
evaluationContext := command.Initialize(args)
if evaluationContext == nil {
command.logger.Panic("ERROR: empty evaluation context")
}
Expand Down
13 changes: 4 additions & 9 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
evaluatetask "github.com/symflower/eval-dev-quality/evaluate/task"
"github.com/symflower/eval-dev-quality/language"
"github.com/symflower/eval-dev-quality/log"
"github.com/symflower/eval-dev-quality/model"
providertesting "github.com/symflower/eval-dev-quality/provider/testing"
"github.com/symflower/eval-dev-quality/tools"
toolstesting "github.com/symflower/eval-dev-quality/tools/testing"
Expand Down Expand Up @@ -522,7 +521,7 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "unloading model")
},
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
},
})
}
Expand Down Expand Up @@ -568,7 +567,7 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "response-no-error=2")
},
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
},
})
}
Expand Down Expand Up @@ -761,18 +760,14 @@ func TestEvaluateInitialize(t *testing.T) {

if tc.ValidatePanic != "" {
assert.PanicsWithValue(t, tc.ValidatePanic, func() {
c, cleanup := tc.Command.Initialize([]string{})
cleanup()
actualEvaluationContext = c
actualEvaluationContext = tc.Command.Initialize([]string{})
})

return
}

assert.NotPanics(t, func() {
c, cleanup := tc.Command.Initialize([]string{})
cleanup()
actualEvaluationContext = c
actualEvaluationContext = tc.Command.Initialize([]string{})
})

if tc.ValidateCommand != nil {
Expand Down
2 changes: 1 addition & 1 deletion cmd/eval-dev-quality/cmd/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func (command *Version) Execute(args []string) (err error) {
if revision == "" {
revision = "development"
}
command.logger.SetFlags(0) // Remove the timestamp and everything from the log output while still being able to test it.
command.logger.SetFlags(log.FlagMessageOnly) // Remove the timestamp and everything from the log output while still being able to test it.
command.logger.Printf("eval-dev-quality version %s - revision %s", evaluate.Version, revision)

return nil
Expand Down
49 changes: 31 additions & 18 deletions evaluate/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,18 +99,24 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin

temporaryRepositories[repositoryPath] = temporaryRepository
}

logger := ctx.Log
for rl := uint(0); rl < ctx.runsAtLanguageLevel(); rl++ {
if ctx.Runs > 1 && !ctx.RunsSequential {
ctx.Log.Printf("Run %d/%d", rl+1, ctx.Runs)
logger.Printf("Run %d/%d", rl+1, ctx.Runs)
}

for _, language := range ctx.Languages {
logger := logger.With(log.AttributeKeyLanguage, language.ID())

languageID := language.ID()
repositoryPath := filepath.Join(language.ID(), RepositoryPlainName)
temporaryRepository := temporaryRepositories[repositoryPath]

logger = logger.With(log.AttributeKeyRepository, temporaryRepository.Name())
for _, model := range ctx.Models {
modelID := model.ID()
logger := logger.With(log.AttributeKeyModel, modelID)

if modelSucceededBasicChecksOfLanguage[model] == nil {
modelSucceededBasicChecksOfLanguage[model] = map[evallanguage.Language]bool{}
Expand All @@ -123,16 +129,18 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
for _, taskIdentifier := range temporaryRepository.SupportedTasks() {
task, err := evaluatetask.TaskForIdentifier(taskIdentifier)
if err != nil {
ctx.Log.Fatal(err)
logger.Fatal(err)
}
withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() {

logger := logger.With(log.AttributeKeyTask, taskIdentifier)
withLoadedModel(logger, model, ctx.ProviderForModel[model], func() {
for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ {
if ctx.Runs > 1 && ctx.RunsSequential {
ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
logger.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
}

if err := temporaryRepository.Reset(ctx.Log); err != nil {
ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err)
if err := temporaryRepository.Reset(logger); err != nil {
logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
}

taskContext := evaltask.Context{
Expand All @@ -142,14 +150,14 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin

ResultPath: ctx.ResultPath,

Logger: ctx.Log,
Logger: logger,
}
assessment, ps, err := task.Run(taskContext)
if err != nil {
ps = append(ps, err)
}
if len(ps) > 0 {
ctx.Log.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps)
logger.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps)
problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
} else {
modelSucceededBasicChecksOfLanguage[model][language] = true
Expand Down Expand Up @@ -196,18 +204,20 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
temporaryRepositories[repositoryPath] = temporaryRepository
}
}
logger := ctx.Log
for rl := uint(0); rl < ctx.runsAtLanguageLevel(); rl++ {
if ctx.Runs > 1 && !ctx.RunsSequential {
ctx.Log.Printf("Run %d/%d", rl+1, ctx.Runs)
logger.Printf("Run %d/%d", rl+1, ctx.Runs)
}

for _, language := range ctx.Languages {
languageID := language.ID()
logger := logger.With(log.AttributeKeyLanguage, languageID)

languagePath := filepath.Join(ctx.TestdataPath, languageID)
repositories, err := os.ReadDir(languagePath)
if err != nil {
ctx.Log.Panicf("ERROR: language path %q cannot be accessed: %s", languagePath, err)
logger.Panicf("ERROR: language path %q cannot be accessed: %s", languagePath, err)
}

for _, repository := range repositories {
Expand All @@ -223,27 +233,30 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
continue
}

logger = logger.With(log.AttributeKeyRepository, repositoryPath)
for _, model := range ctx.Models {
modelID := model.ID()
logger := logger.With(log.AttributeKeyModel, modelID)

if !ctx.NoDisqualification && !modelSucceededBasicChecksOfLanguage[model][language] {
ctx.Log.Printf("Excluding model %q for language %q cause it did not succeed basic checks", model.ID(), language.ID())
logger.Printf("Excluding model %q for language %q cause it did not succeed basic checks", model.ID(), language.ID())

continue
}
for _, taskIdentifier := range temporaryRepository.Tasks {
task, err := evaluatetask.TaskForIdentifier(taskIdentifier)
if err != nil {
ctx.Log.Fatal(err)
logger.Fatal(err)
}
withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() {
logger := logger.With(log.AttributeKeyTask, taskIdentifier)
withLoadedModel(logger, model, ctx.ProviderForModel[model], func() {
for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ {
if ctx.Runs > 1 && ctx.RunsSequential {
ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
logger.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
}

if err := temporaryRepository.Reset(ctx.Log); err != nil {
ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err)
if err := temporaryRepository.Reset(logger); err != nil {
logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
}

taskContext := evaltask.Context{
Expand All @@ -253,12 +266,12 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin

ResultPath: ctx.ResultPath,

Logger: ctx.Log,
Logger: logger,
}
assessment, ps, err := task.Run(taskContext)
problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
if err != nil {
ctx.Log.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err)
logger.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err)
}
assessments.AddAssessmentPerTask(model, language, repositoryPath, assessment)
// Write the task assessment to the evaluation CSV file.
Expand Down
43 changes: 29 additions & 14 deletions evaluate/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,14 @@ func TestEvaluate(t *testing.T) {
}
}()

resultPath := temporaryPath
logger = logger.With(log.AttributeKeyResultPath, resultPath)

tc.Context.Log = logger
if tc.Context.QueryAttempts == 0 {
tc.Context.QueryAttempts = 1
}
tc.Context.ResultPath = temporaryPath
tc.Context.ResultPath = resultPath
if tc.Context.TestdataPath == "" {
tc.Context.TestdataPath = filepath.Join("..", "testdata")
}
Expand Down Expand Up @@ -214,6 +217,7 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), mockedModel.ID(), "golang", "golang", "plain.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
Expand Down Expand Up @@ -272,7 +276,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, ErrEmptyResponseFromModel.Error())
},
filepath.Join("evaluation.csv"): nil,
Expand Down Expand Up @@ -341,7 +346,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "Attempt 1/3: "+ErrEmptyResponseFromModel.Error())
},
filepath.Join("evaluation.csv"): nil,
Expand Down Expand Up @@ -409,7 +415,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "DONE 0 tests, 1 error")
},
filepath.Join("evaluation.csv"): nil,
Expand Down Expand Up @@ -535,8 +542,9 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 0,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"): nil,
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
})
Expand Down Expand Up @@ -635,8 +643,9 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 0,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"): nil,
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
})
Expand Down Expand Up @@ -701,7 +710,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 0,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
})
Expand Down Expand Up @@ -769,7 +779,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 6,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) {
Expand Down Expand Up @@ -840,7 +851,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 6,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) {
Expand Down Expand Up @@ -940,7 +952,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 6,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
})
Expand Down Expand Up @@ -1023,7 +1036,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 6,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
})
Expand Down Expand Up @@ -1088,7 +1102,8 @@ func TestEvaluate(t *testing.T) {
},
ExpectedTotalScore: 2,
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join(string(evaluatetask.IdentifierWriteTests), evalmodel.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
"evaluation.log": nil,
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain.log"): nil,
filepath.Join("evaluation.csv"): nil,
},
})
Expand Down
Loading

0 comments on commit 7d6cb80

Please sign in to comment.