Skip to content

Commit

Permalink
add regex to paragraph selector
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxDall committed Feb 12, 2024
1 parent 40fd1a8 commit a9c44e2
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion src/fundus/publishers/de/dw.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class V2(BaseParser):
# https://regex101.com/r/uZLwyb/1
_author_regex = r"^([a-z]{2,3}\/|[A-Z]{2,3}\/)*([a-z]{2,3}|[A-Z]{2,3})\s\(([a-z]{2,3}, )*([a-z]{2,3})\)$"
_paragraph_selector = XPath(
f"//div[contains(@class, 'rich-text')] /p[not(em) or text() and not(re:test(text(), '{_author_regex}'))]",
f"//div[contains(@class, 'rich-text')] /p[text() and not(re:test(text(), '{_author_regex}'))]",
namespaces={"re": "http://exslt.org/regular-expressions"},
)
_summary_selector = CSSSelector("header > p")
Expand Down

0 comments on commit a9c44e2

Please sign in to comment.