Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[14.0][BCK][PORT] 898 from 16.0 #925

Open
wants to merge 3 commits into
base: 14.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 15 additions & 28 deletions base_ubl/models/ubl.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@

logger = logging.getLogger(__name__)

try:
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.generic import NameObject
except ImportError:
logger.debug("Cannot import PyPDF2")


class BaseUbl(models.AbstractModel):
_name = "base.ubl"
Expand Down Expand Up @@ -592,40 +586,33 @@ def _ubl_check_xml_schema(self, xml_string, document, version="2.1"):
)
return True

# TODO: move to pdf_helper
@api.model
def _ubl_add_xml_in_pdf_buffer(self, xml_string, xml_filename, buffer):
# Add attachment to PDF content.
reader = PdfFileReader(buffer)
writer = PdfFileWriter()
writer.appendPagesFromReader(reader)
writer.addAttachment(xml_filename, xml_string)
# show attachments when opening PDF
writer._root_object.update(
{NameObject("/PageMode"): NameObject("/UseAttachments")}
logger.warning(
"`_ubl_add_xml_in_pdf_buffer` deprecated: use `pdf.helper.pdf_embed_xml`"
)
new_buffer = BytesIO()
writer.write(new_buffer)
pdf_content = buffer.getvalue()
new_content = self.env["pdf.helper"].pdf_embed_xml(
pdf_content, xml_filename, xml_string
)
new_buffer = BytesIO(new_content)
return new_buffer

# TODO: move to pdf_helper
@api.model
def _embed_ubl_xml_in_pdf_content(self, xml_string, xml_filename, pdf_content):
"""Add the attachments to the PDF content.
Use the pdf_content argument, which has the binary of the PDF
-> it will return the new PDF binary with the embedded XML
(used for qweb-pdf reports)
"""
logger.warning(
"`_embed_ubl_xml_in_pdf_content` deprecated: use `pdf.helper.pdf_embed_xml`"
)
self.ensure_one()
logger.debug("Starting to embed %s in PDF", xml_filename)

with BytesIO(pdf_content) as reader_buffer:
buffer = self._ubl_add_xml_in_pdf_buffer(
xml_string, xml_filename, reader_buffer
)
pdf_content = buffer.getvalue()
buffer.close()

pdf_content = self.env["pdf.helper"].pdf_embed_xml(
pdf_content, xml_filename, xml_string
)
logger.info("%s file added to PDF content", xml_filename)
return pdf_content

Expand All @@ -648,8 +635,8 @@ def embed_xml_in_pdf(
if pdf_file:
with open(pdf_file, "rb") as f:
pdf_content = f.read()
updated_pdf_content = self._embed_ubl_xml_in_pdf_content(
xml_string, xml_filename, pdf_content
updated_pdf_content = self.env["pdf.helper"].pdf_embed_xml(
pdf_content, xml_filename, xml_string
)
if pdf_file:
with open(pdf_file, "wb") as f:
Expand Down
35 changes: 26 additions & 9 deletions pdf_helper/models/helper.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Copyright 2022 Camptocamp SA
# @author: Simone Orsi <simahawk@gmail.com>
# Copyright 2023 Jacques-Etienne Baudoux (BCIM) <je@bcim.be>
# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl).
import logging

from pypdf.errors import PdfReadError
import io
import logging

from odoo import models
from odoo import api, models
from odoo.tools.pdf import NameObject, OdooPdfFileReader, OdooPdfFileWriter

from ..utils import PDFParser

Expand All @@ -18,16 +20,31 @@ class PDFHelper(models.AbstractModel):

_PDF_PARSER_KLASS = PDFParser

@api.model
def pdf_get_xml_files(self, pdf_file):
"""Extract XML attachments from pdf

:param pdf_file: binary PDF file content
:returns: a dict like {$filename: $parsed_xml_file_obj}.
"""
parser = self._PDF_PARSER_KLASS(pdf_file)
try:
return parser.get_xml_files()
except self._pdf_get_xml_files_swallable_exceptions() as err:
# TODO: can't we catch specific exceptions?
# This try/except block was added to reflect what done
# in base_business_document_import till now.
except parser.get_xml_files_swallable_exceptions() as err:
_logger.error("PDF file parsing failed: %s", str(err))
return {}

def _pdf_get_xml_files_swallable_exceptions(self):
return (KeyError, PdfReadError)
@api.model
def pdf_embed_xml(self, pdf_content, xml_filename, xml_string):
"""Add an XML attachment in a pdf"""
with io.BytesIO(pdf_content) as reader_buffer, io.BytesIO() as new_pdf_stream:
reader = OdooPdfFileReader(reader_buffer, strict=False)
writer = OdooPdfFileWriter()
writer.cloneReaderDocumentRoot(reader)
writer.addAttachment(xml_filename, xml_string, subtype="text/xml")
# show attachments when opening PDF
writer._root_object.update(
{NameObject("/PageMode"): NameObject("/UseAttachments")}
)
writer.write(new_pdf_stream)
return new_pdf_stream.getvalue()
1 change: 1 addition & 0 deletions pdf_helper/readme/CONTRIBUTORS.rst
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
* Simone Orsi <simone.orsi@camptocamp.com>
* Alexis de Lattre <alexis.delattre@akretion.com>
* Jacques-Etienne Baudoux (BCIM) <je@bcim.be>
2 changes: 2 additions & 0 deletions pdf_helper/readme/USAGE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ Inside Odoo env::

res = env["pdf.helper"].pdf_get_xml_files(pdf_filecontent)

new_pdf_filecontent = env["pdf.helper"].pdf_embed_xml(pdf_filecontent, filename, xml)

Outside Odoo env::

from odoo.addons.pdf_helper.utils import PDFParser
Expand Down
16 changes: 14 additions & 2 deletions pdf_helper/tests/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ def test_parse_xml(self):


class TestPDFHelper(TransactionCase):
def test_parse_xml(self):
def test_get_xml(self):
pdf_content = read_test_file("pdf_with_xml_test.pdf", mode="rb")
res = self.env["pdf.helper"].pdf_get_xml_files(pdf_content)
fname, xml_root = tuple(res.items())[0]
self.assertEqual(fname, "factur-x.xml")
self.assertTrue(isinstance(xml_root, etree._Element))

def test_parse_xml_fail(self):
def test_get_xml_fail(self):
with self.assertLogs(
"odoo.addons.pdf_helper.models.helper", level="ERROR"
) as log_catcher:
Expand All @@ -44,3 +44,15 @@ def test_parse_xml_fail(self):
"PDF file parsing failed: Cannot read an empty file",
log_catcher.output[0],
)

def test_embed_xml(self):
pdf_content = read_test_file("pdf_with_xml_test.pdf", mode="rb")
filename = "test"
xml = b"<root>test</root>"
newpdf_content = self.env["pdf.helper"].pdf_embed_xml(
pdf_content, filename, xml
)
attachments = self.env["pdf.helper"].pdf_get_xml_files(newpdf_content)
self.assertTrue(filename in attachments)
etree_content = attachments[filename]
self.assertEqual(xml, etree.tostring(etree_content))
48 changes: 22 additions & 26 deletions pdf_helper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl).

import logging
import mimetypes
from io import BytesIO
from struct import error as StructError

from lxml import etree

_logger = logging.getLogger(__name__)

try:
import pypdf
from PyPDF2.errors import PdfReadError
except ImportError:
_logger.debug("Cannot import pypdf")
from PyPDF2.utils import PdfReadError

from odoo.tools.pdf import OdooPdfFileReader

_logger = logging.getLogger(__name__)


class PDFParser:
Expand All @@ -35,25 +37,19 @@ def get_xml_files(self):
_logger.debug("Valid XML files found in PDF: %s", list(res.keys()))
return res

def _extract_xml_files(self, fd):
reader = pypdf.PdfReader(fd)
# attachment parsing via pypdf doesn't support /Kids
# cf my bug report https://github.com/py-pdf/pypdf/issues/2087
xmlfiles = {}
for filename, content_list in reader.attachments.items():
_logger.debug("Attachment %s found in PDF", filename)
mime_res = mimetypes.guess_type(filename)
if mime_res and mime_res[0] in ["application/xml", "text/xml"]:
with BytesIO(self.pdf_file) as buffer:
pdf_reader = OdooPdfFileReader(buffer, strict=False)

# Process embedded files.
for xml_name, content in pdf_reader.getAttachments():
try:
_logger.debug("Trying to parse XML attachment %s", filename)
xml_root = etree.fromstring(content_list[0])
if len(xml_root) > 0:
_logger.info("Valid XML file %s found in attachments", filename)
xmlfiles[filename] = xml_root
else:
_logger.warning("XML file %s is empty", filename)
except Exception as err:
_logger.warning(
"Failed to parse XML file %s. Error: %s", filename, str(err)
)
return xmlfiles
res[xml_name] = etree.fromstring(content)
except Exception:
_logger.debug("Non XML file found in PDF")
if res:
_logger.debug("Valid XML files found in PDF: %s", list(res.keys()))
return res

def get_xml_files_swallable_exceptions(self):
return (NotImplementedError, StructError, PdfReadError)

Loading