WIP

symflower · Apr 19, 2024 · 0c00a61 · 0c00a61
1 parent 14f9b93
commit 0c00a61
Showing 1 changed file with 56 additions and 0 deletions.
diff --git a/evaluate/report/markdown.go b/evaluate/report/markdown.go
@@ -0,0 +1,56 @@
+package report
+
+import (
+	"html/template"
+
+	"github.com/symflower/eval-dev-quality/evaluate/metrics"
+	"github.com/symflower/eval-dev-quality/model"
+	"github.com/zimmski/osutil/bytesutil"
+)
+
+type markdownTemplateContext struct {
+	DateTime string
+	Version  string
+	LogPath  string
+	CVSPath  string
+
+	Categories        []metrics.AssessmentCategory
+	ModelsPerCategory map[metrics.AssessmentCategory]model.Model
+}
+
+// markdownTemplate holds the template for a Markdown report.
+var markdownTemplate = template.Must(template.New("template-report").Parse(bytesutil.StringTrimIndentations(`
+	# Evaluation from $DATETIME
+
+	{{< img-blog name="assets/$IMAGE.svg" alt="Bar chart that categorizes all evaluated LLMs.">}}
+
+	This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in version $VERSION.
+
+	## Results
+
+	{{< info-block >}}
+	Keep in mind that LLMs are nondeterministic. The following results just reflect a current snapshot.
+	{{</ info-block >}}
+
+	The results of all models have been divided into the following categories:
+
+	TODO
+
+	- **"no result"** e.g. we received an empty answer
+	- **"no code"** i.e. the model simply does not understand how to write Go code
+	- **"insufficient/unusable code"** e.g. a kind of random collection of statements that are maybe trying to test something but are not
+	- **"incorrect testing method"** e.g. multiple compile errors but mostly the testing code itself was wrong
+	- **"mistake (types, imports)"** i.e. simple compile errors (e.g. incorrect imports), additional code that does not align with the signature of the function under test 🧢
+	- **"passes (overengineered)"** i.e. code compiles and reaches 100%% coverage but there is code that should not go through a code review 🎩
+	- **"perfect solution"** i.e. if at all, minor objections 👑
+
+	The following sections list all models with their categories. The complete log of the evaluation with all outputs can be found [here]($LOGPATH). Detailed scoring can be found [here]($CSVPATH)
+
+	### Result category: "$CATEGORY_NAME"
+
+	$CATEGORY_EXPLANATION
+
+	TODO
+
+	> -
+`)))