SVG category export

Closes #49
symflower · Apr 25, 2024 · 63ccbe5 · 63ccbe5
1 parent 94621c3
commit 63ccbe5
Show file tree

Hide file tree

Showing 9 changed files with 353 additions and 7 deletions.
diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go
@@ -282,6 +282,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
 
 		CSVPath: "./evaluation.csv",
 		LogPath: "./evaluation.log",
+		SVGPath: "./categories.svg",
 
 		AssessmentPerModel: assessmentsPerModel,
 		TotalScore:         totalScore,

diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go
@@ -84,6 +84,10 @@ func TestEvaluateExecute(t *testing.T) {
 				}
 			},
 			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+				"categories.svg": func(t *testing.T, filePath, data string) {
+					assert.Contains(t, data, "No Excess Response</text>") // Assert "no excess" category is present.
+					assert.Contains(t, data, "1</text>")                  // Assert the Y-axis label is at least one for one model in that category.
+				},
 				"evaluation.csv": func(t *testing.T, filePath, data string) {
 					assert.Equal(t, bytesutil.StringTrimIndentations(`
 						model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
@@ -93,6 +97,7 @@ func TestEvaluateExecute(t *testing.T) {
 				"evaluation.log": nil,
 				"README.md": func(t *testing.T, filePath, data string) {
 					// Ensure the report links to the CSV file and logs.
+					assert.Contains(t, data, "./categories.svg")
 					assert.Contains(t, data, "./evaluation.csv")
 					assert.Contains(t, data, "./evaluation.log")
 				},
@@ -113,6 +118,10 @@ func TestEvaluateExecute(t *testing.T) {
 				}
 			},
 			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+				"categories.svg": func(t *testing.T, filePath, data string) {
+					assert.Contains(t, data, "No Excess Response</text>") // Assert "no excess" category is present.
+					assert.Contains(t, data, "1</text>")                  // Assert the Y-axis label is at least one for one model in that category.
+				},
 				"evaluation.csv": func(t *testing.T, filePath, data string) {
 					assert.Equal(t, bytesutil.StringTrimIndentations(`
 						model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
@@ -123,6 +132,7 @@ func TestEvaluateExecute(t *testing.T) {
 				"evaluation.log": nil,
 				"README.md": func(t *testing.T, filePath, data string) {
 					// Ensure the report links to the CSV file and logs.
+					assert.Contains(t, data, "./categories.svg")
 					assert.Contains(t, data, "./evaluation.csv")
 					assert.Contains(t, data, "./evaluation.log")
 				},
@@ -150,6 +160,10 @@ func TestEvaluateExecute(t *testing.T) {
 					}
 				},
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+					"categories.svg": func(t *testing.T, filePath, data string) {
+						assert.Contains(t, data, "No Excess Response</text>") // Assert "no excess" category is present.
+						assert.Contains(t, data, "1</text>")                  // Assert the Y-axis label is at least one for one model in that category.
+					},
 					"evaluation.csv": func(t *testing.T, filePath, data string) {
 						assert.Equal(t, bytesutil.StringTrimIndentations(`
 							model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
@@ -159,6 +173,7 @@ func TestEvaluateExecute(t *testing.T) {
 					"evaluation.log": nil,
 					"README.md": func(t *testing.T, filePath, data string) {
 						// Ensure the report links to the CSV file and logs.
+						assert.Contains(t, data, "./categories.svg")
 						assert.Contains(t, data, "./evaluation.csv")
 						assert.Contains(t, data, "./evaluation.log")
 					},
@@ -180,6 +195,10 @@ func TestEvaluateExecute(t *testing.T) {
 					}
 				},
 				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
+					"categories.svg": func(t *testing.T, filePath, data string) {
+						assert.Contains(t, data, "No Excess Response</text>") // Assert "no excess" category is present.
+						assert.Contains(t, data, "1</text>")                  // Assert the Y-axis label is at least one for one model in that category.
+					},
 					"evaluation.csv": func(t *testing.T, filePath, data string) {
 						assert.Equal(t, bytesutil.StringTrimIndentations(`
 							model,language,repository,score,coverage-statement,files-executed,response-no-error,response-no-excess,response-not-empty,response-with-code
@@ -189,6 +208,7 @@ func TestEvaluateExecute(t *testing.T) {
 					"evaluation.log": nil,
 					"README.md": func(t *testing.T, filePath, data string) {
 						// Ensure the report links to the CSV file and logs.
+						assert.Contains(t, data, "./categories.svg")
 						assert.Contains(t, data, "./evaluation.csv")
 						assert.Contains(t, data, "./evaluation.log")
 					},

diff --git a/evaluate/report/markdown.go b/evaluate/report/markdown.go
@@ -4,10 +4,12 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"strconv"
 	"text/template"
 	"time"
 
 	pkgerrors "github.com/pkg/errors"
+	"github.com/wcharczuk/go-chart/v2"
 	"github.com/zimmski/osutil/bytesutil"
 
 	"github.com/symflower/eval-dev-quality/evaluate/metrics"
@@ -24,6 +26,9 @@ type Markdown struct {
 	CSVPath string
 	// LogPath holds the path of detailed logs.
 	LogPath string
+	// SVGPath holds the path of the charted results.
+	// REMARK The charts will be generated automatically during the export if this path is set.
+	SVGPath string
 
 	// AssessmentPerModel holds
 	AssessmentPerModel map[string]metrics.Assessments
@@ -44,6 +49,11 @@ type markdownTemplateContext struct {
 var markdownTemplate = template.Must(template.New("template-report").Parse(bytesutil.StringTrimIndentations(`
 	# Evaluation from {{.DateTime.Format "2006-01-02 15:04:05"}}
 
+	{{ with $svgPath := .SVGPath -}}
+	![Bar chart that categorizes all evaluated models.]({{$svgPath}})
+
+	{{ end -}}
+
 	This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version {{.Version}}` + "`" + `.
 
 	## Results
@@ -69,8 +79,54 @@ var markdownTemplate = template.Must(template.New("template-report").Parse(bytes
 	{{- end -}}
 `)))
 
+// barChartModelsPerCategoriesSVG generates a bar chart showing models per category and writes it out as an SVG.
+func barChartModelsPerCategoriesSVG(writer io.Writer, categories []*metrics.AssessmentCategory, modelsPerCategory map[*metrics.AssessmentCategory][]string) error {
+	bars := make([]chart.Value, 0, len(categories))
+	maxCount := 0
+	for _, category := range categories {
+		count := len(modelsPerCategory[category])
+		if count > maxCount {
+			maxCount = count
+		}
+		if count == 0 {
+			continue
+		}
+
+		bars = append(bars, chart.Value{
+			Label: category.Name,
+			Value: float64(count),
+		})
+	}
+	ticks := make([]chart.Tick, maxCount+1)
+	for i := range ticks {
+		ticks[i] = chart.Tick{
+			Value: float64(i),
+			Label: strconv.Itoa(i),
+		}
+	}
+	graph := chart.BarChart{
+		Title: "Models per Category",
+		Bars:  bars,
+		YAxis: chart.YAxis{
+			Ticks: ticks,
+		},
+
+		Background: chart.Style{
+			Padding: chart.Box{
+				Top:    60,
+				Bottom: 40,
+			},
+		},
+		Height:   300,
+		Width:    (len(bars) + 2) * 60,
+		BarWidth: 60,
+	}
+
+	return pkgerrors.WithStack(graph.Render(chart.SVG, writer))
+}
+
 // format formats the markdown values in the template to the given writer.
-func (m Markdown) format(writer io.Writer) error {
+func (m Markdown) format(writer io.Writer, markdownFileDirectoryPath string) error {
 	templateContext := markdownTemplateContext{
 		Markdown:   m,
 		Categories: metrics.AllAssessmentCategories,
@@ -80,7 +136,24 @@ func (m Markdown) format(writer io.Writer) error {
 		category := assessment.Category(m.TotalScore)
 		templateContext.ModelsPerCategory[category] = append(templateContext.ModelsPerCategory[category], model)
 	}
-	// TODO Generate svg using maybe https://github.com/wcharczuk/go-chart.
+
+	if m.SVGPath == "" {
+		return pkgerrors.WithStack(markdownTemplate.Execute(writer, templateContext))
+
+	}
+
+	svgFile, err := os.Create(filepath.Join(markdownFileDirectoryPath, m.SVGPath))
+	if err != nil {
+		return pkgerrors.WithStack(err)
+	}
+	defer func() {
+		if err := svgFile.Close(); err != nil {
+			panic(err)
+		}
+	}()
+	if err := barChartModelsPerCategoriesSVG(svgFile, metrics.AllAssessmentCategories, templateContext.ModelsPerCategory); err != nil {
+		return pkgerrors.WithStack(err)
+	}
 
 	return pkgerrors.WithStack(markdownTemplate.Execute(writer, templateContext))
 }
@@ -95,5 +168,5 @@ func (t Markdown) WriteToFile(path string) (err error) {
 		return pkgerrors.WithStack(err)
 	}
 
-	return pkgerrors.WithStack(t.format(file))
+	return pkgerrors.WithStack(t.format(file, filepath.Dir(path)))
 }
diff --git a/evaluate/report/markdown_test.go b/evaluate/report/markdown_test.go
@@ -2,33 +2,67 @@ package report
 
 import (
 	"bytes"
+	"os"
+	"path/filepath"
+	"strings"
 	"testing"
 	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"github.com/symflower/eval-dev-quality/evaluate/metrics"
 	"github.com/zimmski/osutil/bytesutil"
+
+	"github.com/symflower/eval-dev-quality/evaluate/metrics"
 )
 
+// validateFileContent asserts that the file content matches the content of the given file path.
+// The expected file is created if it does not exist. If the contents don't match, the actual content is written to disk alongside the expected file.
+func validateFileContent(t *testing.T, expectedFilePath string, actualFileContent string) {
+	require.NotEmpty(t, expectedFilePath, "expected file path cannot be empty")
+	expectedContent, err := os.ReadFile(expectedFilePath)
+	if err != nil {
+		// Create the file if it does not exist already to make it easy to add new cases.
+		require.ErrorIs(t, err, os.ErrNotExist)
+		expectedContent = []byte("TODO")
+		require.NoError(t, os.WriteFile(expectedFilePath, expectedContent, 0644))
+		t.Logf("expected file %q does not exist yet, created it for you", expectedFilePath)
+	}
+
+	if !assert.Equalf(t, string(expectedContent), actualFileContent, "actual content:\n%s", actualFileContent) {
+		extension := filepath.Ext(expectedFilePath)
+		actualFile := strings.TrimSuffix(expectedFilePath, extension) + "_actual" + extension
+		require.NoError(t, os.WriteFile(actualFile, []byte(actualFileContent), 0644))
+		t.Logf("written actual file content for failing %q to %q", t.Name(), actualFile)
+	}
+}
+
 func TestMarkdownFormat(t *testing.T) {
 	type testCase struct {
 		Name string
 
 		Markdown Markdown
 
-		ExpectedReport string
-		ExpectedError  error
+		ExpectedReport  string
+		ExpectedSVGFile string
+		ExpectedError   error
 	}
 
 	validate := func(t *testing.T, tc *testCase) {
 		t.Run(tc.Name, func(t *testing.T) {
+			temporaryDirectory := t.TempDir()
+
 			var buffer bytes.Buffer
-			actualError := tc.Markdown.format(&buffer)
+			actualError := tc.Markdown.format(&buffer, temporaryDirectory)
 			assert.Equal(t, tc.ExpectedError, actualError)
 			actualReport := buffer.String()
 
 			assert.Equalf(t, bytesutil.StringTrimIndentations(tc.ExpectedReport), actualReport, "Full output:\n%s", actualReport)
+
+			if tc.ExpectedSVGFile != "" {
+				actualSVGContent, err := os.ReadFile(filepath.Join(temporaryDirectory, tc.Markdown.SVGPath))
+				assert.NoError(t, err)
+				validateFileContent(t, tc.ExpectedSVGFile, string(actualSVGContent))
+			}
 		})
 	}
 
@@ -95,6 +129,8 @@ func TestMarkdownFormat(t *testing.T) {
 		ExpectedReport: `
 			# Evaluation from 2000-01-01 00:00:00
 
+			![Bar chart that categorizes all evaluated models.](./file.svg)
+
 			This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version 1234` + "`" + `.
 
 			## Results
@@ -126,5 +162,65 @@ func TestMarkdownFormat(t *testing.T) {
 			- ` + "`ModelNoCode`" + `
 
 		`,
+		ExpectedSVGFile: "testdata/two_models.svg",
+	})
+}
+
+func TestBarChartModelsPerCategoriesSVG(t *testing.T) {
+	type testCase struct {
+		Name string
+
+		Categories        []*metrics.AssessmentCategory
+		ModelsPerCategory map[*metrics.AssessmentCategory]uint
+
+		ExpectedFile  string
+		ExpectedError error
+	}
+
+	validate := func(t *testing.T, tc *testCase) {
+		t.Run(tc.Name, func(t *testing.T) {
+			var actualSVGContent bytes.Buffer
+			dummyModelsPerCategory := make(map[*metrics.AssessmentCategory][]string)
+			for category, count := range tc.ModelsPerCategory {
+				dummyModelsPerCategory[category] = make([]string, count)
+			}
+
+			actualError := barChartModelsPerCategoriesSVG(&actualSVGContent, tc.Categories, dummyModelsPerCategory)
+			assert.Equal(t, tc.ExpectedError, actualError)
+
+			validateFileContent(t, tc.ExpectedFile, actualSVGContent.String())
+		})
+	}
+
+	validate(t, &testCase{
+		Name: "Two Categories",
+
+		Categories: []*metrics.AssessmentCategory{
+			metrics.AssessmentCategoryResponseError,
+			metrics.AssessmentCategoryResponseNoCode,
+		},
+		ModelsPerCategory: map[*metrics.AssessmentCategory]uint{
+			metrics.AssessmentCategoryResponseError:  1,
+			metrics.AssessmentCategoryResponseNoCode: 3,
+		},
+
+		ExpectedFile: "testdata/two_categories.svg",
+	})
+
+	validate(t, &testCase{
+		Name: "All Categories",
+
+		Categories: metrics.AllAssessmentCategories,
+		ModelsPerCategory: map[*metrics.AssessmentCategory]uint{
+			metrics.AssessmentCategoryResponseError:                1,
+			metrics.AssessmentCategoryResponseEmpty:                2,
+			metrics.AssessmentCategoryResponseNoCode:               3,
+			metrics.AssessmentCategoryCodeInvalid:                  4,
+			metrics.AssessmentCategoryCodeExecuted:                 5,
+			metrics.AssessmentCategoryCodeCoverageStatementReached: 6,
+			metrics.AssessmentCategoryCodeNoExcess:                 7,
+		},
+
+		ExpectedFile: "testdata/all_categories.svg",
 	})
 }