Skip to content

Commit

Permalink
SVG category export
Browse files Browse the repository at this point in the history
Closes #49
  • Loading branch information
bauersimon committed Apr 25, 2024
1 parent 94621c3 commit 63ccbe5
Show file tree
Hide file tree
Showing 9 changed files with 353 additions and 7 deletions.
1 change: 1 addition & 0 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ func (command *Evaluate) Execute(args []string) (err error) {

CSVPath: "./evaluation.csv",
LogPath: "./evaluation.log",
SVGPath: "./categories.svg",

AssessmentPerModel: assessmentsPerModel,
TotalScore: totalScore,
Expand Down
20 changes: 20 additions & 0 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ func TestEvaluateExecute(t *testing.T) {
}
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "No Excess Response</text>") // Assert "no excess" category is present.
assert.Contains(t, data, "1</text>") // Assert the Y-axis label is at least one for one model in that category.
},
"evaluation.csv": func(t *testing.T, filePath, data string) {
assert.Equal(t, bytesutil.StringTrimIndentations(`
model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
Expand All @@ -93,6 +97,7 @@ func TestEvaluateExecute(t *testing.T) {
"evaluation.log": nil,
"README.md": func(t *testing.T, filePath, data string) {
// Ensure the report links to the CSV file and logs.
assert.Contains(t, data, "./categories.svg")
assert.Contains(t, data, "./evaluation.csv")
assert.Contains(t, data, "./evaluation.log")
},
Expand All @@ -113,6 +118,10 @@ func TestEvaluateExecute(t *testing.T) {
}
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "No Excess Response</text>") // Assert "no excess" category is present.
assert.Contains(t, data, "1</text>") // Assert the Y-axis label is at least one for one model in that category.
},
"evaluation.csv": func(t *testing.T, filePath, data string) {
assert.Equal(t, bytesutil.StringTrimIndentations(`
model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
Expand All @@ -123,6 +132,7 @@ func TestEvaluateExecute(t *testing.T) {
"evaluation.log": nil,
"README.md": func(t *testing.T, filePath, data string) {
// Ensure the report links to the CSV file and logs.
assert.Contains(t, data, "./categories.svg")
assert.Contains(t, data, "./evaluation.csv")
assert.Contains(t, data, "./evaluation.log")
},
Expand Down Expand Up @@ -150,6 +160,10 @@ func TestEvaluateExecute(t *testing.T) {
}
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "No Excess Response</text>") // Assert "no excess" category is present.
assert.Contains(t, data, "1</text>") // Assert the Y-axis label is at least one for one model in that category.
},
"evaluation.csv": func(t *testing.T, filePath, data string) {
assert.Equal(t, bytesutil.StringTrimIndentations(`
model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
Expand All @@ -159,6 +173,7 @@ func TestEvaluateExecute(t *testing.T) {
"evaluation.log": nil,
"README.md": func(t *testing.T, filePath, data string) {
// Ensure the report links to the CSV file and logs.
assert.Contains(t, data, "./categories.svg")
assert.Contains(t, data, "./evaluation.csv")
assert.Contains(t, data, "./evaluation.log")
},
Expand All @@ -180,6 +195,10 @@ func TestEvaluateExecute(t *testing.T) {
}
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "No Excess Response</text>") // Assert "no excess" category is present.
assert.Contains(t, data, "1</text>") // Assert the Y-axis label is at least one for one model in that category.
},
"evaluation.csv": func(t *testing.T, filePath, data string) {
assert.Equal(t, bytesutil.StringTrimIndentations(`
model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
Expand All @@ -189,6 +208,7 @@ func TestEvaluateExecute(t *testing.T) {
"evaluation.log": nil,
"README.md": func(t *testing.T, filePath, data string) {
// Ensure the report links to the CSV file and logs.
assert.Contains(t, data, "./categories.svg")
assert.Contains(t, data, "./evaluation.csv")
assert.Contains(t, data, "./evaluation.log")
},
Expand Down
79 changes: 76 additions & 3 deletions evaluate/report/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ import (
"io"
"os"
"path/filepath"
"strconv"
"text/template"
"time"

pkgerrors "github.com/pkg/errors"
"github.com/wcharczuk/go-chart/v2"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
Expand All @@ -24,6 +26,9 @@ type Markdown struct {
CSVPath string
// LogPath holds the path of detailed logs.
LogPath string
// SVGPath holds the path of the charted results.
// REMARK The charts will be generated automatically during the export if this path is set.
SVGPath string

// AssessmentPerModel holds
AssessmentPerModel map[string]metrics.Assessments
Expand All @@ -44,6 +49,11 @@ type markdownTemplateContext struct {
var markdownTemplate = template.Must(template.New("template-report").Parse(bytesutil.StringTrimIndentations(`
# Evaluation from {{.DateTime.Format "2006-01-02 15:04:05"}}
{{ with $svgPath := .SVGPath -}}
![Bar chart that categorizes all evaluated models.]({{$svgPath}})
{{ end -}}
This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version {{.Version}}` + "`" + `.
## Results
Expand All @@ -69,8 +79,54 @@ var markdownTemplate = template.Must(template.New("template-report").Parse(bytes
{{- end -}}
`)))

// barChartModelsPerCategoriesSVG generates a bar chart showing models per category and writes it out as an SVG.
func barChartModelsPerCategoriesSVG(writer io.Writer, categories []*metrics.AssessmentCategory, modelsPerCategory map[*metrics.AssessmentCategory][]string) error {
bars := make([]chart.Value, 0, len(categories))
maxCount := 0
for _, category := range categories {
count := len(modelsPerCategory[category])
if count > maxCount {
maxCount = count
}
if count == 0 {
continue
}

bars = append(bars, chart.Value{
Label: category.Name,
Value: float64(count),
})
}
ticks := make([]chart.Tick, maxCount+1)
for i := range ticks {
ticks[i] = chart.Tick{
Value: float64(i),
Label: strconv.Itoa(i),
}
}
graph := chart.BarChart{
Title: "Models per Category",
Bars: bars,
YAxis: chart.YAxis{
Ticks: ticks,
},

Background: chart.Style{
Padding: chart.Box{
Top: 60,
Bottom: 40,
},
},
Height: 300,
Width: (len(bars) + 2) * 60,
BarWidth: 60,
}

return pkgerrors.WithStack(graph.Render(chart.SVG, writer))
}

// format formats the markdown values in the template to the given writer.
func (m Markdown) format(writer io.Writer) error {
func (m Markdown) format(writer io.Writer, markdownFileDirectoryPath string) error {
templateContext := markdownTemplateContext{
Markdown: m,
Categories: metrics.AllAssessmentCategories,
Expand All @@ -80,7 +136,24 @@ func (m Markdown) format(writer io.Writer) error {
category := assessment.Category(m.TotalScore)
templateContext.ModelsPerCategory[category] = append(templateContext.ModelsPerCategory[category], model)
}
// TODO Generate svg using maybe https://github.com/wcharczuk/go-chart.

if m.SVGPath == "" {
return pkgerrors.WithStack(markdownTemplate.Execute(writer, templateContext))

}

svgFile, err := os.Create(filepath.Join(markdownFileDirectoryPath, m.SVGPath))
if err != nil {
return pkgerrors.WithStack(err)
}
defer func() {
if err := svgFile.Close(); err != nil {
panic(err)
}
}()
if err := barChartModelsPerCategoriesSVG(svgFile, metrics.AllAssessmentCategories, templateContext.ModelsPerCategory); err != nil {
return pkgerrors.WithStack(err)
}

return pkgerrors.WithStack(markdownTemplate.Execute(writer, templateContext))
}
Expand All @@ -95,5 +168,5 @@ func (t Markdown) WriteToFile(path string) (err error) {
return pkgerrors.WithStack(err)
}

return pkgerrors.WithStack(t.format(file))
return pkgerrors.WithStack(t.format(file, filepath.Dir(path)))
}
104 changes: 100 additions & 4 deletions evaluate/report/markdown_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,67 @@ package report

import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
)

// validateFileContent asserts that the file content matches the content of the given file path.
// The expected file is created if it does not exist. If the contents don't match, the actual content is written to disk alongside the expected file.
func validateFileContent(t *testing.T, expectedFilePath string, actualFileContent string) {
require.NotEmpty(t, expectedFilePath, "expected file path cannot be empty")
expectedContent, err := os.ReadFile(expectedFilePath)
if err != nil {
// Create the file if it does not exist already to make it easy to add new cases.
require.ErrorIs(t, err, os.ErrNotExist)
expectedContent = []byte("TODO")
require.NoError(t, os.WriteFile(expectedFilePath, expectedContent, 0644))
t.Logf("expected file %q does not exist yet, created it for you", expectedFilePath)
}

if !assert.Equalf(t, string(expectedContent), actualFileContent, "actual content:\n%s", actualFileContent) {
extension := filepath.Ext(expectedFilePath)
actualFile := strings.TrimSuffix(expectedFilePath, extension) + "_actual" + extension
require.NoError(t, os.WriteFile(actualFile, []byte(actualFileContent), 0644))
t.Logf("written actual file content for failing %q to %q", t.Name(), actualFile)
}
}

func TestMarkdownFormat(t *testing.T) {
type testCase struct {
Name string

Markdown Markdown

ExpectedReport string
ExpectedError error
ExpectedReport string
ExpectedSVGFile string
ExpectedError error
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
temporaryDirectory := t.TempDir()

var buffer bytes.Buffer
actualError := tc.Markdown.format(&buffer)
actualError := tc.Markdown.format(&buffer, temporaryDirectory)
assert.Equal(t, tc.ExpectedError, actualError)
actualReport := buffer.String()

assert.Equalf(t, bytesutil.StringTrimIndentations(tc.ExpectedReport), actualReport, "Full output:\n%s", actualReport)

if tc.ExpectedSVGFile != "" {
actualSVGContent, err := os.ReadFile(filepath.Join(temporaryDirectory, tc.Markdown.SVGPath))
assert.NoError(t, err)
validateFileContent(t, tc.ExpectedSVGFile, string(actualSVGContent))
}
})
}

Expand Down Expand Up @@ -95,6 +129,8 @@ func TestMarkdownFormat(t *testing.T) {
ExpectedReport: `
# Evaluation from 2000-01-01 00:00:00
![Bar chart that categorizes all evaluated models.](./file.svg)
This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version 1234` + "`" + `.
## Results
Expand Down Expand Up @@ -126,5 +162,65 @@ func TestMarkdownFormat(t *testing.T) {
- ` + "`ModelNoCode`" + `
`,
ExpectedSVGFile: "testdata/two_models.svg",
})
}

func TestBarChartModelsPerCategoriesSVG(t *testing.T) {
type testCase struct {
Name string

Categories []*metrics.AssessmentCategory
ModelsPerCategory map[*metrics.AssessmentCategory]uint

ExpectedFile string
ExpectedError error
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
var actualSVGContent bytes.Buffer
dummyModelsPerCategory := make(map[*metrics.AssessmentCategory][]string)
for category, count := range tc.ModelsPerCategory {
dummyModelsPerCategory[category] = make([]string, count)
}

actualError := barChartModelsPerCategoriesSVG(&actualSVGContent, tc.Categories, dummyModelsPerCategory)
assert.Equal(t, tc.ExpectedError, actualError)

validateFileContent(t, tc.ExpectedFile, actualSVGContent.String())
})
}

validate(t, &testCase{
Name: "Two Categories",

Categories: []*metrics.AssessmentCategory{
metrics.AssessmentCategoryResponseError,
metrics.AssessmentCategoryResponseNoCode,
},
ModelsPerCategory: map[*metrics.AssessmentCategory]uint{
metrics.AssessmentCategoryResponseError: 1,
metrics.AssessmentCategoryResponseNoCode: 3,
},

ExpectedFile: "testdata/two_categories.svg",
})

validate(t, &testCase{
Name: "All Categories",

Categories: metrics.AllAssessmentCategories,
ModelsPerCategory: map[*metrics.AssessmentCategory]uint{
metrics.AssessmentCategoryResponseError: 1,
metrics.AssessmentCategoryResponseEmpty: 2,
metrics.AssessmentCategoryResponseNoCode: 3,
metrics.AssessmentCategoryCodeInvalid: 4,
metrics.AssessmentCategoryCodeExecuted: 5,
metrics.AssessmentCategoryCodeCoverageStatementReached: 6,
metrics.AssessmentCategoryCodeNoExcess: 7,
},

ExpectedFile: "testdata/all_categories.svg",
})
}
Loading

0 comments on commit 63ccbe5

Please sign in to comment.