diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index ec021643cf..f802b26f5f 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -199,6 +199,9 @@ public String getPageSeparator() { * @param pageSeparator */ public void setPageSeparator(String pageSeparator) { + if (pageSeparator.isBlank()) { + return; + } Matcher m = ALLOWABLE_PAGE_SEPARATORS_PATTERN.matcher(pageSeparator); if (!m.find()) { throw new IllegalArgumentException(pageSeparator + " contains illegal characters.\n" +