Skip to content

Commit

Permalink
More precise CSV header and add problem count as well as excluded models
Browse files Browse the repository at this point in the history
  • Loading branch information
bauersimon committed Apr 3, 2024
1 parent ba0b79b commit a73b828
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 10 deletions.
4 changes: 4 additions & 0 deletions cmd/eval-symflower-codegen-testing/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ func (command *Evaluate) Execute(args []string) (err error) {
// Evaluating models and languages.
log.Printf("Evaluating models and languages")
metricsPerModel := map[string]evaluate.Metrics{}
// Ensure we report metrics for every model even if they are excluded.
for _, modelID := range modelIDs {
metricsPerModel[modelID] = evaluate.Metrics{}
}
problemsPerModel := map[string][]error{}
for _, languageID := range command.Languages {
languagePath := filepath.Join(command.TestdataPath, languageID)
Expand Down
27 changes: 22 additions & 5 deletions evaluate/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ import (

// Metrics holds numerical benchmarking metrics.
type Metrics struct {
// Total is the total number of benchmarking candidates.
Total uint
// Executed is the number of benchmarking candidates with successful execution.
Executed uint
// Problems is the number of benchmarking candidates with problems.
Problems uint
// Total is the total number of benchmarking candidates.
Total uint

// Coverage holds the coverage of the benchmarking candidates.
Coverage []float64
Expand All @@ -26,8 +28,9 @@ type Metrics struct {
// Add sums two metrics objects.
func (m Metrics) Add(o Metrics) Metrics {
return Metrics{
Total: m.Total + o.Total,
Problems: m.Problems + o.Problems,
Executed: m.Executed + o.Executed,
Total: m.Total + o.Total,

Coverage: append(m.Coverage, o.Coverage...),
}
Expand All @@ -45,18 +48,32 @@ func (m Metrics) AverageCoverage() float64 {

// String returns a string representation of the metrics.
func (m Metrics) String() string {
problemsPercentage := float64(m.Problems) / float64(m.Total) * 100.0
if math.IsNaN(problemsPercentage) {
problemsPercentage = 0
}
executedPercentage := float64(m.Executed) / float64(m.Total) * 100.0
if math.IsNaN(executedPercentage) {
executedPercentage = 0
}
return fmt.Sprintf("#executed=%3.1f%%(%d/%d), average coverage=%3.1f", executedPercentage, m.Executed, m.Total, m.AverageCoverage())
return fmt.Sprintf(
"#executed=%3.1f%%(%d/%d), #problems=%3.1f%%(%d/%d), average statement coverage=%3.1f%%",
executedPercentage,
m.Executed,
m.Total,
problemsPercentage,
m.Problems,
m.Total,
m.AverageCoverage(),
)
}

// StringCSV returns a CSV row string representation of the metrics.
func (m Metrics) StringCSV() []string {
return []string{
fmt.Sprintf("%d", m.Total),
fmt.Sprintf("%d", m.Executed),
fmt.Sprintf("%d", m.Problems),
fmt.Sprintf("%.0f", m.AverageCoverage()),
}
}
Expand All @@ -66,7 +83,7 @@ func FormatStringCSV(metricsPerModel map[string]Metrics) (string, error) {
var out strings.Builder
csv := csv.NewWriter(&out)

if err := csv.Write([]string{"model", "total", "executed", "coverage"}); err != nil {
if err := csv.Write([]string{"model", "files-total", "files-executed", "files-problems", "coverage-statement"}); err != nil {
return "", err
}
categories := maps.Keys(metricsPerModel)
Expand Down
12 changes: 7 additions & 5 deletions evaluate/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ func TestFormatStringCSV(t *testing.T) {
},

ExpectedString: `
model,total,executed,coverage
Model,0,0,0
model,files-total,files-executed,files-problems,coverage-statement
Model,0,0,0,0
`,
})
validate(t, &testCase{
Expand All @@ -44,19 +44,21 @@ func TestFormatStringCSV(t *testing.T) {
"ModelA": Metrics{
Total: 5,
Executed: 3,
Problems: 2,
Coverage: []float64{100.0},
},
"ModelB": Metrics{
Total: 4,
Executed: 2,
Problems: 2,
Coverage: []float64{70.0},
},
},

ExpectedString: `
model,total,executed,coverage
ModelA,5,3,100
ModelB,4,2,70
model,files-total,files-executed,files-problems,coverage-statement
ModelA,5,3,2,100
ModelB,4,2,2,70
`,
})
}
2 changes: 2 additions & 0 deletions evaluate/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ func EvaluateRepository(model model.Model, language language.Language, repositor
metrics.Total++
if err := model.GenerateTestsForFile(temporaryRepositoryPath, filePath); err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))
metrics.Problems++

continue
}

coverage, err := language.Execute(temporaryRepositoryPath)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))
metrics.Problems++

continue
}
Expand Down

0 comments on commit a73b828

Please sign in to comment.