SVG category export

Closes #49
symflower · Apr 26, 2024 · 777de50 · 777de50
1 parent 11c14c8
commit 777de50
Show file tree

Hide file tree

Showing 10 changed files with 331 additions and 6 deletions.
diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go
@@ -281,6 +281,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
 
 		CSVPath: "./evaluation.csv",
 		LogPath: "./evaluation.log",
+		SVGPath: "./categories.svg",
 
 		AssessmentPerModel: assessmentsPerModel,
 		TotalScore:         totalScore,

diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go
@@ -1,6 +1,7 @@
 package cmd
 
 import (
+	"fmt"
 	"os"
 	"path/filepath"
 	"sort"
@@ -12,15 +13,25 @@ import (
 	"github.com/zimmski/osutil"
 	"github.com/zimmski/osutil/bytesutil"
 
+	"github.com/symflower/eval-dev-quality/evaluate/metrics"
 	"github.com/symflower/eval-dev-quality/log"
 )
 
 // validateReportLinks checks if the Markdown report data contains all the links to other relevant report files.
 func validateReportLinks(t *testing.T, data string) {
+	assert.Contains(t, data, "](./categories.svg)")
 	assert.Contains(t, data, "](./evaluation.csv)")
 	assert.Contains(t, data, "](./evaluation.log)")
 }
 
+// validateSVGContent checks if the SVG data contains all given categories and an axis label for the maximal model count.
+func validateSVGContent(t *testing.T, data string, categories []*metrics.AssessmentCategory, maxModelCount uint) {
+	for _, category := range categories {
+		assert.Contains(t, data, fmt.Sprintf("%s</text>", category.Name))
+	}
+	assert.Contains(t, data, fmt.Sprintf("%d</text>", maxModelCount))
+}
+
 func TestEvaluateExecute(t *testing.T) {
 	type testCase struct {
 		Name string
@@ -90,6 +101,9 @@ func TestEvaluateExecute(t *testing.T) {
 				}
 			},
 			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+				"categories.svg": func(t *testing.T, filePath, data string) {
+					validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
+				},
 				"evaluation.csv": func(t *testing.T, filePath, data string) {
 					assert.Equal(t, bytesutil.StringTrimIndentations(`
 						model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
@@ -117,6 +131,9 @@ func TestEvaluateExecute(t *testing.T) {
 				}
 			},
 			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+				"categories.svg": func(t *testing.T, filePath, data string) {
+					validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
+				},
 				"evaluation.csv": func(t *testing.T, filePath, data string) {
 					assert.Equal(t, bytesutil.StringTrimIndentations(`
 						model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
@@ -152,6 +169,9 @@ func TestEvaluateExecute(t *testing.T) {
 					}
 				},
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+					"categories.svg": func(t *testing.T, filePath, data string) {
+						validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
+					},
 					"evaluation.csv": func(t *testing.T, filePath, data string) {
 						assert.Equal(t, bytesutil.StringTrimIndentations(`
 							model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
@@ -180,6 +200,9 @@ func TestEvaluateExecute(t *testing.T) {
 					}
 				},
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+					"categories.svg": func(t *testing.T, filePath, data string) {
+						validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
+					},
 					"evaluation.csv": func(t *testing.T, filePath, data string) {
 						assert.Equal(t, bytesutil.StringTrimIndentations(`
 							model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code

diff --git a/evaluate/report/markdown.go b/evaluate/report/markdown.go
@@ -4,10 +4,12 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"strconv"
 	"text/template"
 	"time"
 
 	pkgerrors "github.com/pkg/errors"
+	"github.com/wcharczuk/go-chart/v2"
 	"github.com/zimmski/osutil/bytesutil"
 
 	"github.com/symflower/eval-dev-quality/evaluate/metrics"
@@ -24,6 +26,8 @@ type Markdown struct {
 	CSVPath string
 	// LogPath holds the path of detailed logs.
 	LogPath string
+	// SVGPath holds the path of the charted results.
+	SVGPath string
 
 	// AssessmentPerModel holds
 	AssessmentPerModel map[string]metrics.Assessments
@@ -43,6 +47,8 @@ type markdownTemplateContext struct {
 var markdownTemplate = template.Must(template.New("template-report").Parse(bytesutil.StringTrimIndentations(`
 	# Evaluation from {{.DateTime.Format "2006-01-02 15:04:05"}}
 
+	![Bar chart that categorizes all evaluated models.]({{.SVGPath}})
+
 	This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version {{.Version}}` + "`" + `.
 
 	## Results
@@ -68,8 +74,54 @@ var markdownTemplate = template.Must(template.New("template-report").Parse(bytes
 	{{- end -}}
 `)))
 
+// barChartModelsPerCategoriesSVG generates a bar chart showing models per category and writes it out as an SVG.
+func barChartModelsPerCategoriesSVG(writer io.Writer, categories []*metrics.AssessmentCategory, modelsPerCategory map[*metrics.AssessmentCategory][]string) error {
+	bars := make([]chart.Value, 0, len(categories))
+	maxCount := 0
+	for _, category := range categories {
+		count := len(modelsPerCategory[category])
+		if count > maxCount {
+			maxCount = count
+		}
+		if count == 0 {
+			continue
+		}
+
+		bars = append(bars, chart.Value{
+			Label: category.Name,
+			Value: float64(count),
+		})
+	}
+	ticks := make([]chart.Tick, maxCount+1)
+	for i := range ticks {
+		ticks[i] = chart.Tick{
+			Value: float64(i),
+			Label: strconv.Itoa(i),
+		}
+	}
+	graph := chart.BarChart{
+		Title: "Models per Category",
+		Bars:  bars,
+		YAxis: chart.YAxis{
+			Ticks: ticks,
+		},
+
+		Background: chart.Style{
+			Padding: chart.Box{
+				Top:    60,
+				Bottom: 40,
+			},
+		},
+		Height:   300,
+		Width:    (len(bars) + 2) * 60,
+		BarWidth: 60,
+	}
+
+	return pkgerrors.WithStack(graph.Render(chart.SVG, writer))
+}
+
 // format formats the markdown values in the template to the given writer.
-func (m Markdown) format(writer io.Writer) error {
+func (m Markdown) format(writer io.Writer, markdownFileDirectoryPath string) error {
 	templateContext := markdownTemplateContext{
 		Markdown:   m,
 		Categories: metrics.AllAssessmentCategories,
@@ -79,7 +131,22 @@ func (m Markdown) format(writer io.Writer) error {
 		category := assessment.Category(m.TotalScore)
 		templateContext.ModelsPerCategory[category] = append(templateContext.ModelsPerCategory[category], model)
 	}
-	// TODO Generate svg using maybe https://github.com/wcharczuk/go-chart.
+
+	svgFile, err := os.Create(filepath.Join(markdownFileDirectoryPath, m.SVGPath))
+	if err != nil {
+		return pkgerrors.WithStack(err)
+	}
+	defer func() {
+		if err := svgFile.Close(); err != nil {
+			panic(err)
+		}
+	}()
+
+	if len(templateContext.AssessmentPerModel) > 0 {
+		if err := barChartModelsPerCategoriesSVG(svgFile, metrics.AllAssessmentCategories, templateContext.ModelsPerCategory); err != nil {
+			return pkgerrors.WithStack(err)
+		}
+	}
 
 	if err := markdownTemplate.Execute(writer, templateContext); err != nil {
 		return pkgerrors.WithStack(err)
@@ -98,7 +165,7 @@ func (m Markdown) WriteToFile(path string) (err error) {
 		return pkgerrors.WithStack(err)
 	}
 
-	if err := m.format(file); err != nil {
+	if err := m.format(file, filepath.Dir(path)); err != nil {
 		return pkgerrors.WithStack(err)
 	}
 

diff --git a/evaluate/report/markdown_test.go b/evaluate/report/markdown_test.go
@@ -1,15 +1,17 @@
 package report
 
 import (
+	"bytes"
 	"os"
 	"path/filepath"
 	"testing"
 	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"github.com/symflower/eval-dev-quality/evaluate/metrics"
 	"github.com/zimmski/osutil/bytesutil"
+
+	"github.com/symflower/eval-dev-quality/evaluate/metrics"
 )
 
 func TestMarkdownWriteToFile(t *testing.T) {
@@ -18,8 +20,9 @@ func TestMarkdownWriteToFile(t *testing.T) {
 
 		Markdown Markdown
 
-		ExpectedReport string
-		ExpectedError  error
+		ExpectedReport  string
+		ExpectedSVGFile string
+		ExpectedError   error
 	}
 
 	validate := func(t *testing.T, tc *testCase) {
@@ -33,6 +36,12 @@ func TestMarkdownWriteToFile(t *testing.T) {
 			assert.NoError(t, err)
 
 			assert.Equalf(t, bytesutil.StringTrimIndentations(tc.ExpectedReport), string(actualReport), "Full output:\n%s", actualReport)
+
+			actualSVGContent, err := os.ReadFile(filepath.Join(temporaryDirectory, tc.Markdown.SVGPath))
+			assert.NoError(t, err)
+			expectedSVGContent, err := os.ReadFile(tc.ExpectedSVGFile)
+			require.NoError(t, err)
+			assert.Equal(t, string(expectedSVGContent), string(actualSVGContent))
 		})
 	}
 
@@ -49,11 +58,14 @@ func TestMarkdownWriteToFile(t *testing.T) {
 
 			CSVPath: "./file.csv",
 			LogPath: "./file.log",
+			SVGPath: "./file.svg",
 		},
 
 		ExpectedReport: `
 			# Evaluation from 2000-01-01 00:00:00
 
+			![Bar chart that categorizes all evaluated models.](./file.svg)
+
 			This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version 1234` + "`" + `.
 
 			## Results
@@ -73,6 +85,7 @@ func TestMarkdownWriteToFile(t *testing.T) {
 			The following sections list all models with their categories. The complete log of the evaluation with all outputs can be found [here](./file.log). Detailed scoring can be found [here](./file.csv).
 
 		`,
+		ExpectedSVGFile: "testdata/empty.svg",
 	})
 
 	validate(t, &testCase{
@@ -84,6 +97,7 @@ func TestMarkdownWriteToFile(t *testing.T) {
 
 			CSVPath: "./file.csv",
 			LogPath: "./file.log",
+			SVGPath: "./file.svg",
 
 			TotalScore: 1,
 			AssessmentPerModel: map[string]metrics.Assessments{
@@ -98,6 +112,8 @@ func TestMarkdownWriteToFile(t *testing.T) {
 		ExpectedReport: `
 			# Evaluation from 2000-01-01 00:00:00
 
+			![Bar chart that categorizes all evaluated models.](./file.svg)
+
 			This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version 1234` + "`" + `.
 
 			## Results
@@ -129,5 +145,67 @@ func TestMarkdownWriteToFile(t *testing.T) {
 			- ` + "`ModelNoCode`" + `
 
 		`,
+		ExpectedSVGFile: "testdata/two_models.svg",
+	})
+}
+
+func TestBarChartModelsPerCategoriesSVG(t *testing.T) {
+	type testCase struct {
+		Name string
+
+		Categories        []*metrics.AssessmentCategory
+		ModelsPerCategory map[*metrics.AssessmentCategory]uint
+
+		ExpectedFile  string
+		ExpectedError error
+	}
+
+	validate := func(t *testing.T, tc *testCase) {
+		t.Run(tc.Name, func(t *testing.T) {
+			var actualSVGContent bytes.Buffer
+			dummyModelsPerCategory := make(map[*metrics.AssessmentCategory][]string)
+			for category, count := range tc.ModelsPerCategory {
+				dummyModelsPerCategory[category] = make([]string, count)
+			}
+
+			actualError := barChartModelsPerCategoriesSVG(&actualSVGContent, tc.Categories, dummyModelsPerCategory)
+			assert.Equal(t, tc.ExpectedError, actualError)
+
+			expectedSVGContent, err := os.ReadFile(tc.ExpectedFile)
+			require.NoError(t, err)
+			assert.Equal(t, string(expectedSVGContent), actualSVGContent.String())
+		})
+	}
+
+	validate(t, &testCase{
+		Name: "Two Categories",
+
+		Categories: []*metrics.AssessmentCategory{
+			metrics.AssessmentCategoryResponseError,
+			metrics.AssessmentCategoryResponseNoCode,
+		},
+		ModelsPerCategory: map[*metrics.AssessmentCategory]uint{
+			metrics.AssessmentCategoryResponseError:  1,
+			metrics.AssessmentCategoryResponseNoCode: 3,
+		},
+
+		ExpectedFile: "testdata/two_categories.svg",
+	})
+
+	validate(t, &testCase{
+		Name: "All Categories",
+
+		Categories: metrics.AllAssessmentCategories,
+		ModelsPerCategory: map[*metrics.AssessmentCategory]uint{
+			metrics.AssessmentCategoryResponseError:                1,
+			metrics.AssessmentCategoryResponseEmpty:                2,
+			metrics.AssessmentCategoryResponseNoCode:               3,
+			metrics.AssessmentCategoryCodeInvalid:                  4,
+			metrics.AssessmentCategoryCodeExecuted:                 5,
+			metrics.AssessmentCategoryCodeCoverageStatementReached: 6,
+			metrics.AssessmentCategoryCodeNoExcess:                 7,
+		},
+
+		ExpectedFile: "testdata/all_categories.svg",
 	})
 }