Skip to content

Commit

Permalink
SVG category export
Browse files Browse the repository at this point in the history
Closes #49
  • Loading branch information
bauersimon committed Apr 26, 2024
1 parent 11c14c8 commit 777de50
Show file tree
Hide file tree
Showing 10 changed files with 331 additions and 6 deletions.
1 change: 1 addition & 0 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ func (command *Evaluate) Execute(args []string) (err error) {

CSVPath: "./evaluation.csv",
LogPath: "./evaluation.log",
SVGPath: "./categories.svg",

AssessmentPerModel: assessmentsPerModel,
TotalScore: totalScore,
Expand Down
23 changes: 23 additions & 0 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cmd

import (
"fmt"
"os"
"path/filepath"
"sort"
Expand All @@ -12,15 +13,25 @@ import (
"github.com/zimmski/osutil"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/symflower/eval-dev-quality/log"
)

// validateReportLinks checks if the Markdown report data contains all the links to other relevant report files.
func validateReportLinks(t *testing.T, data string) {
assert.Contains(t, data, "](./categories.svg)")
assert.Contains(t, data, "](./evaluation.csv)")
assert.Contains(t, data, "](./evaluation.log)")
}

// validateSVGContent checks if the SVG data contains all given categories and an axis label for the maximal model count.
func validateSVGContent(t *testing.T, data string, categories []*metrics.AssessmentCategory, maxModelCount uint) {
for _, category := range categories {
assert.Contains(t, data, fmt.Sprintf("%s</text>", category.Name))
}
assert.Contains(t, data, fmt.Sprintf("%d</text>", maxModelCount))
}

func TestEvaluateExecute(t *testing.T) {
type testCase struct {
Name string
Expand Down Expand Up @@ -90,6 +101,9 @@ func TestEvaluateExecute(t *testing.T) {
}
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": func(t *testing.T, filePath, data string) {
validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
},
"evaluation.csv": func(t *testing.T, filePath, data string) {
assert.Equal(t, bytesutil.StringTrimIndentations(`
model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
Expand Down Expand Up @@ -117,6 +131,9 @@ func TestEvaluateExecute(t *testing.T) {
}
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": func(t *testing.T, filePath, data string) {
validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
},
"evaluation.csv": func(t *testing.T, filePath, data string) {
assert.Equal(t, bytesutil.StringTrimIndentations(`
model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
Expand Down Expand Up @@ -152,6 +169,9 @@ func TestEvaluateExecute(t *testing.T) {
}
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": func(t *testing.T, filePath, data string) {
validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
},
"evaluation.csv": func(t *testing.T, filePath, data string) {
assert.Equal(t, bytesutil.StringTrimIndentations(`
model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
Expand Down Expand Up @@ -180,6 +200,9 @@ func TestEvaluateExecute(t *testing.T) {
}
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": func(t *testing.T, filePath, data string) {
validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
},
"evaluation.csv": func(t *testing.T, filePath, data string) {
assert.Equal(t, bytesutil.StringTrimIndentations(`
model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
Expand Down
73 changes: 70 additions & 3 deletions evaluate/report/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ import (
"io"
"os"
"path/filepath"
"strconv"
"text/template"
"time"

pkgerrors "github.com/pkg/errors"
"github.com/wcharczuk/go-chart/v2"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
Expand All @@ -24,6 +26,8 @@ type Markdown struct {
CSVPath string
// LogPath holds the path of detailed logs.
LogPath string
// SVGPath holds the path of the charted results.
SVGPath string

// AssessmentPerModel holds
AssessmentPerModel map[string]metrics.Assessments
Expand All @@ -43,6 +47,8 @@ type markdownTemplateContext struct {
var markdownTemplate = template.Must(template.New("template-report").Parse(bytesutil.StringTrimIndentations(`
# Evaluation from {{.DateTime.Format "2006-01-02 15:04:05"}}
![Bar chart that categorizes all evaluated models.]({{.SVGPath}})
This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version {{.Version}}` + "`" + `.
## Results
Expand All @@ -68,8 +74,54 @@ var markdownTemplate = template.Must(template.New("template-report").Parse(bytes
{{- end -}}
`)))

// barChartModelsPerCategoriesSVG generates a bar chart showing models per category and writes it out as an SVG.
func barChartModelsPerCategoriesSVG(writer io.Writer, categories []*metrics.AssessmentCategory, modelsPerCategory map[*metrics.AssessmentCategory][]string) error {
bars := make([]chart.Value, 0, len(categories))
maxCount := 0
for _, category := range categories {
count := len(modelsPerCategory[category])
if count > maxCount {
maxCount = count
}
if count == 0 {
continue
}

bars = append(bars, chart.Value{
Label: category.Name,
Value: float64(count),
})
}
ticks := make([]chart.Tick, maxCount+1)
for i := range ticks {
ticks[i] = chart.Tick{
Value: float64(i),
Label: strconv.Itoa(i),
}
}
graph := chart.BarChart{
Title: "Models per Category",
Bars: bars,
YAxis: chart.YAxis{
Ticks: ticks,
},

Background: chart.Style{
Padding: chart.Box{
Top: 60,
Bottom: 40,
},
},
Height: 300,
Width: (len(bars) + 2) * 60,
BarWidth: 60,
}

return pkgerrors.WithStack(graph.Render(chart.SVG, writer))
}

// format formats the markdown values in the template to the given writer.
func (m Markdown) format(writer io.Writer) error {
func (m Markdown) format(writer io.Writer, markdownFileDirectoryPath string) error {
templateContext := markdownTemplateContext{
Markdown: m,
Categories: metrics.AllAssessmentCategories,
Expand All @@ -79,7 +131,22 @@ func (m Markdown) format(writer io.Writer) error {
category := assessment.Category(m.TotalScore)
templateContext.ModelsPerCategory[category] = append(templateContext.ModelsPerCategory[category], model)
}
// TODO Generate svg using maybe https://github.com/wcharczuk/go-chart.

svgFile, err := os.Create(filepath.Join(markdownFileDirectoryPath, m.SVGPath))
if err != nil {
return pkgerrors.WithStack(err)
}
defer func() {
if err := svgFile.Close(); err != nil {
panic(err)
}
}()

if len(templateContext.AssessmentPerModel) > 0 {
if err := barChartModelsPerCategoriesSVG(svgFile, metrics.AllAssessmentCategories, templateContext.ModelsPerCategory); err != nil {
return pkgerrors.WithStack(err)
}
}

if err := markdownTemplate.Execute(writer, templateContext); err != nil {
return pkgerrors.WithStack(err)
Expand All @@ -98,7 +165,7 @@ func (m Markdown) WriteToFile(path string) (err error) {
return pkgerrors.WithStack(err)
}

if err := m.format(file); err != nil {
if err := m.format(file, filepath.Dir(path)); err != nil {
return pkgerrors.WithStack(err)
}

Expand Down
84 changes: 81 additions & 3 deletions evaluate/report/markdown_test.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
package report

import (
"bytes"
"os"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
)

func TestMarkdownWriteToFile(t *testing.T) {
Expand All @@ -18,8 +20,9 @@ func TestMarkdownWriteToFile(t *testing.T) {

Markdown Markdown

ExpectedReport string
ExpectedError error
ExpectedReport string
ExpectedSVGFile string
ExpectedError error
}

validate := func(t *testing.T, tc *testCase) {
Expand All @@ -33,6 +36,12 @@ func TestMarkdownWriteToFile(t *testing.T) {
assert.NoError(t, err)

assert.Equalf(t, bytesutil.StringTrimIndentations(tc.ExpectedReport), string(actualReport), "Full output:\n%s", actualReport)

actualSVGContent, err := os.ReadFile(filepath.Join(temporaryDirectory, tc.Markdown.SVGPath))
assert.NoError(t, err)
expectedSVGContent, err := os.ReadFile(tc.ExpectedSVGFile)
require.NoError(t, err)
assert.Equal(t, string(expectedSVGContent), string(actualSVGContent))
})
}

Expand All @@ -49,11 +58,14 @@ func TestMarkdownWriteToFile(t *testing.T) {

CSVPath: "./file.csv",
LogPath: "./file.log",
SVGPath: "./file.svg",
},

ExpectedReport: `
# Evaluation from 2000-01-01 00:00:00
![Bar chart that categorizes all evaluated models.](./file.svg)
This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version 1234` + "`" + `.
## Results
Expand All @@ -73,6 +85,7 @@ func TestMarkdownWriteToFile(t *testing.T) {
The following sections list all models with their categories. The complete log of the evaluation with all outputs can be found [here](./file.log). Detailed scoring can be found [here](./file.csv).
`,
ExpectedSVGFile: "testdata/empty.svg",
})

validate(t, &testCase{
Expand All @@ -84,6 +97,7 @@ func TestMarkdownWriteToFile(t *testing.T) {

CSVPath: "./file.csv",
LogPath: "./file.log",
SVGPath: "./file.svg",

TotalScore: 1,
AssessmentPerModel: map[string]metrics.Assessments{
Expand All @@ -98,6 +112,8 @@ func TestMarkdownWriteToFile(t *testing.T) {
ExpectedReport: `
# Evaluation from 2000-01-01 00:00:00
![Bar chart that categorizes all evaluated models.](./file.svg)
This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version 1234` + "`" + `.
## Results
Expand Down Expand Up @@ -129,5 +145,67 @@ func TestMarkdownWriteToFile(t *testing.T) {
- ` + "`ModelNoCode`" + `
`,
ExpectedSVGFile: "testdata/two_models.svg",
})
}

func TestBarChartModelsPerCategoriesSVG(t *testing.T) {
type testCase struct {
Name string

Categories []*metrics.AssessmentCategory
ModelsPerCategory map[*metrics.AssessmentCategory]uint

ExpectedFile string
ExpectedError error
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
var actualSVGContent bytes.Buffer
dummyModelsPerCategory := make(map[*metrics.AssessmentCategory][]string)
for category, count := range tc.ModelsPerCategory {
dummyModelsPerCategory[category] = make([]string, count)
}

actualError := barChartModelsPerCategoriesSVG(&actualSVGContent, tc.Categories, dummyModelsPerCategory)
assert.Equal(t, tc.ExpectedError, actualError)

expectedSVGContent, err := os.ReadFile(tc.ExpectedFile)
require.NoError(t, err)
assert.Equal(t, string(expectedSVGContent), actualSVGContent.String())
})
}

validate(t, &testCase{
Name: "Two Categories",

Categories: []*metrics.AssessmentCategory{
metrics.AssessmentCategoryResponseError,
metrics.AssessmentCategoryResponseNoCode,
},
ModelsPerCategory: map[*metrics.AssessmentCategory]uint{
metrics.AssessmentCategoryResponseError: 1,
metrics.AssessmentCategoryResponseNoCode: 3,
},

ExpectedFile: "testdata/two_categories.svg",
})

validate(t, &testCase{
Name: "All Categories",

Categories: metrics.AllAssessmentCategories,
ModelsPerCategory: map[*metrics.AssessmentCategory]uint{
metrics.AssessmentCategoryResponseError: 1,
metrics.AssessmentCategoryResponseEmpty: 2,
metrics.AssessmentCategoryResponseNoCode: 3,
metrics.AssessmentCategoryCodeInvalid: 4,
metrics.AssessmentCategoryCodeExecuted: 5,
metrics.AssessmentCategoryCodeCoverageStatementReached: 6,
metrics.AssessmentCategoryCodeNoExcess: 7,
},

ExpectedFile: "testdata/all_categories.svg",
})
}
Loading

0 comments on commit 777de50

Please sign in to comment.