flairNLP · addie9800 · Feb 24, 2024 · Feb 18, 2024 · Feb 18, 2024 · Feb 18, 2024
diff --git a/docs/attribute_guidelines.md b/docs/attribute_guidelines.md
@@ -58,4 +58,12 @@ Those attributes will be validated with unit tests when used.
         <td><code>List[str]</code></td>
         <td><code>generic_topic_parsing</code></td>
     </tr>
+    <tr>
+        <td>free_access</td>
+        <td>A boolean which is set to be False, if the article is restricted to users with a subscription. This usually indicates
+        that the article cannot be crawled completely.
+        <i><b>This attribute is implemented by default</b></i></td>
+        <td><code>bool</code></td>
+        <td><code></code></td>
+    </tr>
 </table>
diff --git a/docs/how_to_add_a_publisher.md b/docs/how_to_add_a_publisher.md
@@ -16,6 +16,7 @@
     * [Working with `lxml`](#working-with-lxml)
     * [CSS-Select](#css-select)
     * [XPath](#xpath)
+  * [Checking the free_access attribute](#checking-the-free_access-attribute)
   * [Finishing the Parser](#finishing-the-parser)
 * [6. Generate unit tests](#6-generate-unit-tests)
 * [7. Opening a Pull Request](#7-opening-a-pull-request)
@@ -469,6 +470,23 @@ Instead, we recommend referring to [this](https://devhints.io/xpath) documentati
 Make sure to examine other parsers and consult the [attribute guidelines](attribute_guidelines.md) for specifics on attribute implementation. 
 We strongly encourage utilizing these utility functions, especially when parsing the `ArticleBody`.
 
+### Checking the free_access attribute
+
+In case your new publisher does not have a subscription model, you can go ahead and skip this step.
+If it does, please verify that there is a tag `isAccessibleForFree` within the HTMLs `ld+json` elements (refer to the section [Extracting attributes from Precomputed](#extracting-attributes-from-precomputed) for details) in the source code of premium articles that is set to either `false` or `False`,  `true`/`True` respectively.
+It doesn't matter if the tag is missing in the freely accessible articles.
+If this is the case, you can continue with the next step. If not, please overwrite the existing function by adding the following snippet to your parser:
+
+```python
+@attribute
+def free_access(self) -> bool:
+    # Your personalized logic goes here
+    ...
+```
+
+Usually you can identify a premium article by an indicator within the URL or by using XPath or CSSSelector and selecting
+the element asking to to purchase a subscription to view the article.
+
 ### Finishing the Parser
 
 Bringing all the above together, the Los Angeles Times now looks like this.

diff --git a/docs/supported_publishers.md b/docs/supported_publishers.md
@@ -91,9 +91,7 @@
         </a>
       </td>
       <td>&#160;</td>
-      <td>
-        <code>free_access</code>
-      </td>
+      <td>&#160;</td>
     </tr>
     <tr>
       <td>

diff --git a/src/fundus/parser/base_parser.py b/src/fundus/parser/base_parser.py
@@ -233,6 +233,15 @@ def __meta(self) -> Dict[str, Any]:
     def __ld(self) -> Optional[LinkedDataMapping]:
         return self.precomputed.ld
 
+    @attribute
+    def free_access(self) -> bool:
+        if (isAccessibleForFree := self.precomputed.ld.bf_search("isAccessibleForFree")) is None:
+            return True
+        elif not isAccessibleForFree or isAccessibleForFree == "false" or isAccessibleForFree == "False":
+            return False
+        else:
+            return True
+
 
 class _ParserCache:
     def __init__(self, factory: Type[BaseParser]):

diff --git a/src/fundus/publishers/de/bild.py b/src/fundus/publishers/de/bild.py
@@ -1,4 +1,5 @@
 import datetime
+import re
 from typing import List, Optional
 
 from lxml.etree import XPath
@@ -42,3 +43,10 @@ def title(self) -> Optional[str]:
         @attribute
         def topics(self) -> List[str]:
             return generic_topic_parsing(self.precomputed.meta.get("keywords"))
+
+        @attribute
+        def free_access(self) -> bool:
+            if (url := self.precomputed.meta.get("og:url")) is not None:
+                return re.search(r"/bild-plus/", url) is None
+            else:
+                return True
diff --git a/src/fundus/publishers/de/braunschweiger_zeitung.py b/src/fundus/publishers/de/braunschweiger_zeitung.py
@@ -56,7 +56,3 @@ def authors(self) -> List[str]:
         @attribute
         def publishing_date(self) -> Optional[datetime.datetime]:
             return generic_date_parsing(self.precomputed.ld.bf_search("datePublished"))
-
-        @attribute(validate=False)
-        def free_access(self) -> bool:
-            return self.precomputed.ld.bf_search("isAccessibleForFree") == "True"
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -47,10 +47,10 @@ def test_functions_iter(self, parser_with_function_test, parser_with_static_meth
         assert parser_with_function_test.functions().names == ["test"]
 
     def test_attributes_iter(self, parser_with_attr_title, parser_with_static_method):
-        assert len(BaseParser.attributes()) == 0
-        assert len(parser_with_static_method.attributes()) == 0
-        assert len(parser_with_attr_title.attributes()) == 1
-        assert parser_with_attr_title.attributes().names == ["title"]
+        assert len(BaseParser.attributes()) == 1
+        assert len(parser_with_static_method.attributes()) == 1
+        assert len(parser_with_attr_title.attributes()) == 2
+        assert parser_with_attr_title.attributes().names == ["free_access", "title"]
 
     def test_supported_unsupported(self):
         class ParserWithValidatedAndUnvalidated(BaseParser):
@@ -63,12 +63,12 @@ def unvalidated(self) -> str:
                 return "unsupported"
 
         parser = ParserWithValidatedAndUnvalidated()
-        assert len(parser.attributes()) == 2
+        assert len(parser.attributes()) == 3
 
         assert (validated := parser.attributes().validated)
         assert isinstance(validated, AttributeCollection)
         assert (funcs := list(validated)) != [parser.validated]
-        assert funcs[0].__func__ == parser.validated.__func__
+        assert funcs[1].__func__ == parser.validated.__func__
 
         assert (unvalidated := parser.attributes().unvalidated)
         assert isinstance(validated, AttributeCollection)
-Original file line number
+Diff line change
@@ Expand Up / @@ -91,9 +91,7 @@ @@
             </a>
           </td>
           <td>&#160;</td>
-          <td>
-            <code>free_access</code>
-          </td>
+          <td>&#160;</td>
         </tr>
         <tr>
           <td>
@@ Expand Down @@