Skip to content

Commit

Permalink
Merge pull request #274 from symflower/numerate-containerized-result-…
Browse files Browse the repository at this point in the history
…paths

Always numerize the result path of containerized runs to avoid I/O sync problems
  • Loading branch information
ruiAzevedo19 authored Jul 18, 2024
2 parents acb0f6d + 16c800b commit 19857ff
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 24 deletions.
26 changes: 20 additions & 6 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"path/filepath"
"slices"
"sort"
"strconv"
"strings"
"text/template"
"time"
Expand Down Expand Up @@ -503,7 +504,8 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
}

// Iterate over each model and start the container.
for _, model := range ctx.Models {
models := map[string]bool{}
for i, model := range ctx.Models {
// We are skipping ollama models until we fully support pulling. https://github.com/symflower/eval-dev-quality/issues/100.
if ctx.ProviderForModel[model].ID() == "ollama" {
command.logger.Print("Skipping unsupported ollama model with docker runtime")
Expand All @@ -522,11 +524,18 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
}

// Commands for the evaluation to run inside the container.
resultPath := "/app/evaluation/" + log.CleanModelNameForFileSystem(model.ID())
if models[model.ID()] {
resultPath += "_" + strconv.Itoa(i)
} else {
models[model.ID()] = true
}

evaluationCommand := []string{
"eval-dev-quality",
"evaluate",
"--model", model.ID(),
"--result-path", "/app/evaluation/" + model.ID(),
"--result-path", resultPath,
}

cmd := append(dockerCommand, evaluationCommand...)
Expand Down Expand Up @@ -618,6 +627,7 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) {
parallel := util.NewParallel(command.Parallel)

// Iterate over each model and start the container.
models := map[string]bool{}
for i, model := range ctx.Models {
// We are skipping ollama models until we fully support pulling. https://github.com/symflower/eval-dev-quality/issues/100.
if ctx.ProviderForModel[model].ID() == "ollama" {
Expand All @@ -635,13 +645,17 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) {
}

// Commands for the evaluation to run inside the container.
resultPath := "/var/evaluation/" + log.CleanModelNameForFileSystem(model.ID())
if models[model.ID()] {
resultPath += "_" + strconv.Itoa(i)
} else {
models[model.ID()] = true
}
evaluationCommand := []string{
"eval-dev-quality",
"evaluate",
"--model",
model.ID(),
"--result-path",
"/var/evaluations/" + model.ID(),
"--model", model.ID(),
"--result-path", resultPath,
}
cmd := append(evaluationCommand, args...)

Expand Down
36 changes: 18 additions & 18 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -809,8 +809,8 @@ func TestEvaluateExecute(t *testing.T) {
filepath.Join("result-directory", "evaluation.log"): nil,

// Parallel run 1
filepath.Join("result-directory", "symflower", "symbolic-execution", "categories.svg"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution", "evaluation.csv"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution", "categories.svg"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution", "evaluation.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 10,
Expand Down Expand Up @@ -850,18 +850,18 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount])
assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount])
},
filepath.Join("result-directory", "symflower", "symbolic-execution", "evaluation.log"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution", "README.md"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution", "evaluation.log"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution", "README.md"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},
filepath.Join("result-directory", "symflower", "symbolic-execution", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},

// Parallel run 2
filepath.Join("result-directory", "symflower", "symbolic-execution-0", "categories.svg"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution-0", "evaluation.csv"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution_1", "categories.svg"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution_1", "evaluation.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 10,
Expand Down Expand Up @@ -901,18 +901,18 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount])
assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount])
},
filepath.Join("result-directory", "symflower", "symbolic-execution-0", "evaluation.log"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution-0", "README.md"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution_1", "evaluation.log"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution_1", "README.md"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution_1", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},
filepath.Join("result-directory", "symflower", "symbolic-execution-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution_1", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},

// Parallel run 3
filepath.Join("result-directory", "symflower", "symbolic-execution-1", "categories.svg"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution-1", "evaluation.csv"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution_2", "categories.svg"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution_2", "evaluation.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 10,
Expand Down Expand Up @@ -952,12 +952,12 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount])
assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount])
},
filepath.Join("result-directory", "symflower", "symbolic-execution-1", "evaluation.log"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution-1", "README.md"): nil,
filepath.Join("result-directory", "symflower", "symbolic-execution-1", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution_2", "evaluation.log"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution_2", "README.md"): nil,
filepath.Join("result-directory", "symflower_symbolic-execution_2", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},
filepath.Join("result-directory", "symflower", "symbolic-execution-1", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) {
filepath.Join("result-directory", "symflower_symbolic-execution_2", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},
},
Expand Down

0 comments on commit 19857ff

Please sign in to comment.