From fc9cbdff12299d900afd9b8649f5fd6d55cb47a9 Mon Sep 17 00:00:00 2001 From: qubixes <44498096+qubixes@users.noreply.github.com> Date: Tue, 3 Oct 2023 15:50:55 +0200 Subject: [PATCH] Fix issue where long strings would result in error (#7) Co-authored-by: Raoul Schram --- regexmodel/regexclass.py | 1 + 1 file changed, 1 insertion(+) diff --git a/regexmodel/regexclass.py b/regexmodel/regexclass.py index f250be5..3f8b0b9 100644 --- a/regexmodel/regexclass.py +++ b/regexmodel/regexclass.py @@ -325,6 +325,7 @@ def score(series: pl.Series, regex: BaseRegex, count_thres: int, fraction_cover = n_unique/regex.n_possible expected_finish = fraction_match**avg_len_next*next_not_null + expected_finish = max(1e-12, expected_finish) split_penalty = 1/(1 + count_thres/expected_finish) cur_score = regex.subrange_penalty*split_penalty*fraction_cover*fraction_match return cur_score, next_series, first_char