Coverage for adhoc-cicd-odoo-odoo / odoo / tools / pdf / __init__.py: 18%
334 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-09 18:05 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-09 18:05 +0000
1# Part of Odoo. See LICENSE file for full copyright and licensing details.
2import base64
3import importlib
4import io
5import re
6import unicodedata
7import sys
8from datetime import datetime
9from hashlib import md5
10from logging import getLogger
11from zlib import compress, decompress, decompressobj
13from PIL import Image, PdfImagePlugin
15from odoo import modules
16from odoo.tools.arabic_reshaper import reshape
17from odoo.tools.parse_version import parse_version
18from odoo.tools.misc import file_open, SENTINEL
20# ----------------------------------------------------------
21# PyPDF2 hack
22# ensure that zlib does not throw error -5 when decompressing
23# because some pdf won't fit into allocated memory
24# https://docs.python.org/3/library/zlib.html#zlib.decompressobj
25# ----------------------------------------------------------
26try:
27 import zlib
29 def _decompress(data):
30 zobj = zlib.decompressobj()
31 return zobj.decompress(data)
33 import PyPDF2.filters # needed after PyPDF2 2.0.0 and before 2.11.0
34 PyPDF2.filters.decompress = _decompress
35except ImportError:
36 pass # no fix required
39# might be a good case for exception groups
40error = None
41# keep pypdf2 2.x first so noble uses that rather than pypdf 4.0
42for SUBMOD in ['._pypdf2_2', '._pypdf', '._pypdf2_1']: 42 ↛ 50line 42 didn't jump to line 50 because the loop on line 42 didn't complete
43 try:
44 pypdf = importlib.import_module(SUBMOD, __spec__.name)
45 break
46 except ImportError as e:
47 if error is None:
48 error = e
49else:
50 raise ImportError("pypdf implementation not found") from error
51del error
53PdfReaderBase, PdfWriter, filters, generic, errors, create_string_object =\
54 pypdf.PdfReader, pypdf.PdfWriter, pypdf.filters, pypdf.generic, pypdf.errors, pypdf.create_string_object
55# because they got re-exported
56ArrayObject, BooleanObject, ByteStringObject, DecodedStreamObject, DictionaryObject, IndirectObject, NameObject, NumberObject =\
57 generic.ArrayObject, generic.BooleanObject, generic.ByteStringObject, generic.DecodedStreamObject, generic.DictionaryObject, generic.IndirectObject, generic.NameObject, generic.NumberObject
59# compatibility aliases
60PdfReadError = errors.PdfReadError # moved in 2.0
61PdfStreamError = errors.PdfStreamError # moved in 2.0
62createStringObject = create_string_object # deprecated in 2.0, removed in 5.0
63try:
64 DependencyError = errors.DependencyError
65except AttributeError:
66 DependencyError = NotImplementedError
68# ----------------------------------------------------------
69# PyPDF2 hack
70# ensure that zlib does not throw error -5 when decompressing
71# because some pdf won't fit into allocated memory
72# https://docs.python.org/3/library/zlib.html#zlib.decompressobj
73# ----------------------------------------------------------
74pypdf.filters.decompress = lambda data: decompressobj().decompress(data)
77# monkey patch to discard unused arguments as the old arguments were not discarded in the transitional class
78# This keep the old default value of the `strict` argument
79# https://github.com/py-pdf/pypdf/blob/1.26.0/PyPDF2/pdf.py#L1061
80# https://pypdf2.readthedocs.io/en/2.0.0/_modules/PyPDF2/_reader.html#PdfReader
81class PdfReader(PdfReaderBase):
82 def __init__(self, stream, strict=True, *args, **kwargs):
83 super().__init__(stream, strict)
86# Ensure that PdfFileReader and PdfFileWriter are available in case it's still used somewhere
87PdfFileReader = pypdf.PdfFileReader = PdfReader
88pypdf.PdfFileWriter = PdfWriter
90_logger = getLogger(__name__)
91DEFAULT_PDF_DATETIME_FORMAT = "D:%Y%m%d%H%M%S+00'00'"
92REGEX_SUBTYPE_UNFORMATED = re.compile(r'^\w+/[\w-]+$')
93REGEX_SUBTYPE_FORMATED = re.compile(r'^/\w+#2F[\w-]+$')
96# Disable linter warning: this import is needed to make sure a PDF stream can be saved in Image.
97PdfImagePlugin.__name__
100# make sure values are unwrapped by calling the specialized __getitem__
101def _unwrapping_get(self, key, default=None):
102 try:
103 return self[key]
104 except KeyError:
105 return default
108DictionaryObject.get = _unwrapping_get
111if hasattr(NameObject, 'renumber_table'): 111 ↛ 121line 111 didn't jump to line 121 because the condition on line 111 was always true
112 # Make sure all the correct delimiters are included
113 # We will make this change only if pypdf has the renumber_table attribute
114 # https://github.com/py-pdf/pypdf/commit/8c542f331828c5839fda48442d89b8ac5d3984ac
115 NameObject.renumber_table.update({
116 **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},
117 **{chr(i): f"#{i:02X}".encode() for i in range(33)},
118 })
121if hasattr(PdfWriter, 'write_stream'): 121 ↛ 132line 121 didn't jump to line 132 because the condition on line 121 was always true
122 # >= 2.x has a utility `write` which can open a path, so `write_stream` could be called directly
123 class BrandedFileWriter(PdfWriter):
124 def write_stream(self, *args, **kwargs):
125 self.add_metadata({
126 '/Creator': "Odoo",
127 '/Producer': "Odoo",
128 })
129 super().write_stream(*args, **kwargs)
130else:
131 # 1.x has a monolithic write method
132 class BrandedFileWriter(PdfWriter):
133 def write(self, *args, **kwargs):
134 self.addMetadata({
135 '/Creator': "Odoo",
136 '/Producer': "Odoo",
137 })
138 super().write(*args, **kwargs)
141PdfFileWriter = BrandedFileWriter
144def merge_pdf(pdf_data):
145 ''' Merge a collection of PDF documents in one.
146 Note that the attachments are not merged.
147 :param list pdf_data: a list of PDF datastrings
148 :return: a unique merged PDF datastring
149 '''
150 writer = PdfFileWriter()
151 for document in pdf_data:
152 reader = PdfFileReader(io.BytesIO(document), strict=False)
153 for page in range(0, reader.getNumPages()):
154 writer.addPage(reader.getPage(page))
156 with io.BytesIO() as _buffer:
157 writer.write(_buffer)
158 return _buffer.getvalue()
161def fill_form_fields_pdf(writer, form_fields):
162 ''' Fill in the form fields of a PDF
163 :param writer: a PdfFileWriter object
164 :param dict form_fields: a dictionary of form fields to update in the PDF
165 :return: a filled PDF datastring
166 '''
168 # This solves a known problem with PyPDF2, where with some pdf software, forms fields aren't
169 # correctly filled until the user click on it, see: https://github.com/py-pdf/pypdf/issues/355
170 if hasattr(writer, 'set_need_appearances_writer'):
171 writer.set_need_appearances_writer()
172 is_upper_version_pypdf2 = True
173 else: # This method was renamed in PyPDF2 2.0
174 is_upper_version_pypdf2 = False
175 catalog = writer._root_object
176 # get the AcroForm tree
177 if "/AcroForm" not in catalog:
178 writer._root_object.update({
179 NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)
180 })
181 writer._root_object["/AcroForm"][NameObject("/NeedAppearances")] = BooleanObject(True)
183 nbr_pages = len(writer.pages) if is_upper_version_pypdf2 else writer.getNumPages()
185 for page_id in range(0, nbr_pages):
186 page = writer.getPage(page_id)
188 if is_upper_version_pypdf2:
189 writer.update_page_form_field_values(page, form_fields)
190 else:
191 # Known bug on previous versions of PyPDF2, fixed in 2.11
192 if not page.get('/Annots'):
193 _logger.info("No fields to update in this page")
194 else:
195 try:
196 writer.updatePageFormFieldValues(page, form_fields)
197 except ValueError:
198 # Known bug on previous versions of PyPDF2 for some PDFs, fixed in 2.4.2
199 _logger.info("Fields couldn't be filled in this page.")
200 continue
203def rotate_pdf(pdf):
204 ''' Rotate clockwise PDF (90°) into a new PDF.
205 Note that the attachments are not copied.
206 :param pdf: a PDF to rotate
207 :return: a PDF rotated
208 '''
209 writer = PdfFileWriter()
210 reader = PdfFileReader(io.BytesIO(pdf), strict=False)
211 for page in range(0, reader.getNumPages()):
212 page = reader.getPage(page)
213 page.rotateClockwise(90)
214 writer.addPage(page)
215 with io.BytesIO() as _buffer:
216 writer.write(_buffer)
217 return _buffer.getvalue()
220def to_pdf_stream(attachment) -> io.BytesIO | None:
221 """Get the byte stream of the attachment as a PDF."""
222 if not attachment.raw:
223 _logger.warning("%s has no raw data.", attachment)
224 return None
226 if attachment_raw := attachment._get_pdf_raw():
227 return io.BytesIO(attachment_raw)
228 stream = io.BytesIO(attachment.raw)
229 if attachment.mimetype.startswith('image'):
230 output_stream = io.BytesIO()
231 Image.open(stream).convert("RGB").save(output_stream, format="pdf")
232 return output_stream
233 _logger.warning("mimetype (%s) not recognized for %s", attachment.mimetype, attachment)
234 return None
237def extract_page(attachment, num_page=0) -> io.BytesIO | None:
238 """Exctract a specific page form an attachement pdf"""
239 pdf_stream = to_pdf_stream(attachment)
240 if not pdf_stream:
241 return
242 pdf = PdfFileReader(pdf_stream)
243 page = pdf.getPage(num_page)
244 pdf_writer = PdfFileWriter()
245 pdf_writer.addPage(page)
246 stream = io.BytesIO()
247 pdf_writer.write(stream)
248 return stream
251def add_banner(pdf_stream, text=None, logo=False, thickness=SENTINEL):
252 """ Add a banner on a PDF in the upper right corner, with Odoo's logo (optionally).
254 :param pdf_stream (BytesIO): The PDF stream where the banner will be applied.
255 :param text (str): The text to be displayed.
256 :param logo (bool): Whether to display Odoo's logo in the banner.
257 :param thickness (float): The thickness of the banner in pixels (default: 2cm).
258 :return (BytesIO): The modified PDF stream.
259 """
260 from reportlab.lib import colors # noqa: PLC0415
261 from reportlab.lib.utils import ImageReader # noqa: PLC0415
262 from reportlab.pdfgen import canvas # noqa: PLC0415
264 if thickness is SENTINEL:
265 from reportlab.lib.units import cm # noqa: PLC0415
266 thickness = 2 * cm
268 old_pdf = PdfFileReader(pdf_stream, strict=False, overwriteWarnings=False)
269 packet = io.BytesIO()
270 can = canvas.Canvas(packet)
271 with file_open('base/static/img/main_partner-image.png', mode='rb') as f:
272 odoo_logo_file = io.BytesIO(f.read())
273 odoo_logo = Image.open(odoo_logo_file)
274 odoo_color = colors.Color(113 / 255, 75 / 255, 103 / 255, 0.8)
276 for p in range(old_pdf.getNumPages()):
277 page = old_pdf.getPage(p)
278 width = float(abs(page.mediaBox.getWidth()))
279 height = float(abs(page.mediaBox.getHeight()))
281 can.setPageSize((width, height))
282 can.translate(width, height)
283 can.rotate(-45)
285 # Draw banner
286 path = can.beginPath()
287 path.moveTo(-width, -thickness)
288 path.lineTo(-width, -2 * thickness)
289 path.lineTo(width, -2 * thickness)
290 path.lineTo(width, -thickness)
291 can.setFillColor(odoo_color)
292 can.drawPath(path, fill=1, stroke=False)
294 # Insert text (and logo) inside the banner
295 can.setFontSize(10)
296 can.setFillColor(colors.white)
297 can.drawRightString(0.75 * thickness, -1.45 * thickness, text)
298 logo and can.drawImage(
299 ImageReader(odoo_logo), 0.25 * thickness, -2.05 * thickness, 40, 40, mask='auto', preserveAspectRatio=True)
301 can.showPage()
303 can.save()
305 # Merge the old pages with the watermark
306 watermark_pdf = PdfFileReader(packet, overwriteWarnings=False)
307 new_pdf = PdfFileWriter()
308 for p in range(old_pdf.getNumPages()):
309 new_page = old_pdf.getPage(p)
310 # Remove annotations (if any), to prevent errors in PyPDF2
311 if '/Annots' in new_page:
312 del new_page['/Annots']
313 new_page.mergePage(watermark_pdf.getPage(p))
314 new_pdf.addPage(new_page)
316 # Write the new pdf into a new output stream
317 output = io.BytesIO()
318 new_pdf.write(output)
320 return output
323def reshape_text(text):
324 """
325 Display the text based on his first character unicode name to choose Right-to-left or Left-to-right
326 This is just a hotfix to make things work
327 In the future the clean way be to use arabic-reshaper and python3-bidi libraries
330 Here we want to check the text is in a right-to-left language and if then, flip before returning it.
331 Depending on the language, the type should be Left-to-Right, Right-to-Left, or Right-to-Left Arabic
332 (Refer to this https://www.unicode.org/reports/tr9/#Bidirectional_Character_Types)
333 The base module ```unicodedata``` with his function ```bidirectional(str)``` helps us by taking a character in
334 argument and returns his type:
335 - 'L' for Left-to-Right character
336 - 'R' or 'AL' for Right-to-Left character
338 So we have to check if the first character of the text is of type 'R' or 'AL', and check that there is no
339 character in the rest of the text that is of type 'L'. Based on that we can confirm we have a fully Right-to-Left language,
340 then we can flip the text before returning it.
341 """
342 if not text:
343 return ''
344 maybe_rtl_letter = text.lstrip()[:1] or ' '
345 maybe_ltr_text = text[1:]
346 first_letter_is_rtl = unicodedata.bidirectional(maybe_rtl_letter) in ('AL', 'R')
347 no_letter_is_ltr = not any(unicodedata.bidirectional(letter) == 'L' for letter in maybe_ltr_text)
348 if first_letter_is_rtl and no_letter_is_ltr:
349 text = reshape(text)
350 text = text[::-1]
352 return text
355class OdooPdfFileReader(PdfFileReader):
356 # OVERRIDE of PdfFileReader to add the management of multiple embedded files.
358 ''' Returns the files inside the PDF.
359 :raises NotImplementedError: if document is encrypted and uses an unsupported encryption method.
360 '''
361 def getAttachments(self):
362 if self.isEncrypted:
363 # If the PDF is owner-encrypted, try to unwrap it by giving it an empty user password.
364 self.decrypt('')
366 try:
367 file_path = self.trailer["/Root"].get("/Names", {}).get("/EmbeddedFiles", {}).get("/Names")
369 if not file_path:
370 return []
371 for p in file_path[1::2]:
372 attachment = p.getObject()
373 yield (attachment["/F"], attachment["/EF"]["/F"].getObject().getData())
374 except Exception: # noqa: BLE001
375 # malformed pdf (i.e. invalid xref page)
376 return []
379class OdooPdfFileWriter(PdfFileWriter):
381 def __init__(self, *args, **kwargs):
382 """
383 Override of the init to initialise additional variables.
384 :param pdf_content: if given, will initialise the reader with the pdf content.
385 """
386 super().__init__(*args, **kwargs)
387 self._reader = None
388 self.is_pdfa = False
390 def format_subtype(self, subtype):
391 """
392 Apply the correct format to the subtype.
393 It should take the form of "/xxx#2Fxxx". E.g. for "text/xml": "/text#2Fxml"
394 :param subtype: The mime-type of the attachement.
395 """
396 if not subtype:
397 return subtype
399 adapted_subtype = subtype
400 if REGEX_SUBTYPE_UNFORMATED.match(subtype):
401 # _pypdf2_2 and _pypdf does the formating when creating a NameObject
402 if SUBMOD in ('._pypdf2_2', '._pypdf'):
403 return '/' + subtype
404 adapted_subtype = '/' + subtype.replace('/', '#2F')
406 if not REGEX_SUBTYPE_FORMATED.match(adapted_subtype):
407 # The subtype still does not match the correct format, so we will not add it to the document
408 _logger.warning("Attempt to add an attachment with the incorrect subtype '%s'. The subtype will be ignored.", subtype)
409 adapted_subtype = ''
410 return adapted_subtype
412 def add_attachment(self, name, data, subtype=None):
413 """
414 Add an attachment to the pdf. Supports adding multiple attachment, while respecting PDF/A rules.
415 :param name: The name of the attachement
416 :param data: The data of the attachement
417 :param subtype: The mime-type of the attachement. This is required by PDF/A, but not essential otherwise.
418 """
419 adapted_subtype = self.format_subtype(subtype)
421 attachment = self._create_attachment_object({
422 'filename': name,
423 'content': data,
424 'subtype': adapted_subtype,
425 })
426 if self._root_object.get('/Names') and self._root_object['/Names'].get('/EmbeddedFiles'):
427 names_array = self._root_object["/Names"]["/EmbeddedFiles"]["/Names"]
428 names_array.extend([attachment.getObject()['/F'], attachment])
429 else:
430 names_array = ArrayObject()
431 names_array.extend([attachment.getObject()['/F'], attachment])
433 embedded_files_names_dictionary = DictionaryObject()
434 embedded_files_names_dictionary.update({
435 NameObject("/Names"): names_array
436 })
437 embedded_files_dictionary = DictionaryObject()
438 embedded_files_dictionary.update({
439 NameObject("/EmbeddedFiles"): embedded_files_names_dictionary
440 })
441 self._root_object.update({
442 NameObject("/Names"): embedded_files_dictionary
443 })
445 if self._root_object.get('/AF'):
446 attachment_array = self._root_object['/AF']
447 attachment_array.extend([attachment])
448 else:
449 # Create a new object containing an array referencing embedded file
450 # And reference this array in the root catalogue
451 attachment_array = self._addObject(ArrayObject([attachment]))
452 self._root_object.update({
453 NameObject("/AF"): attachment_array
454 })
455 addAttachment = add_attachment
457 def embed_odoo_attachment(self, attachment, subtype=None):
458 assert attachment, "embed_odoo_attachment cannot be called without attachment."
459 self.addAttachment(attachment.name, attachment.raw, subtype=subtype or attachment.mimetype)
461 def cloneReaderDocumentRoot(self, reader):
462 super().cloneReaderDocumentRoot(reader)
463 self._reader = reader
464 # Try to read the header coming in, and reuse it in our new PDF
465 # This is done in order to allows modifying PDF/A files after creating them (as PyPDF does not read it)
466 stream = reader.stream
467 stream.seek(0)
468 header = stream.readlines(9)
469 # Should always be true, the first line of a pdf should have 9 bytes (%PDF-1.x plus a newline)
470 if len(header) == 1:
471 # If we found a header, set it back to the new pdf
472 self._header = header[0]
473 # Also check the second line. If it is PDF/A, it should be a line starting by % following by four bytes + \n
474 second_line = stream.readlines(1)[0]
475 if second_line.decode('latin-1')[0] == '%' and len(second_line) == 6:
476 self.is_pdfa = True
477 # This is broken in pypdf 3+ and pypdf2 has been automatically
478 # writing a binary comment since 1.27
479 # py-pdf/pypdf@036789a4664e3f572292bc7dceec10f08b7dbf62 so we
480 # only need this if running on 1.x
481 #
482 # incidentally that means the heuristic above is completely broken
483 if SUBMOD == '._pypdf2_1':
484 self._header += second_line
485 # clone_reader_document_root clones reader._ID since 3.2 (py-pdf/pypdf#1520)
486 if not hasattr(self, '_ID'):
487 # Look if we have an ID in the incoming stream and use it.
488 self._set_id(reader.trailer.get('/ID', None))
490 def _set_id(self, pdf_id):
491 if not pdf_id:
492 return
494 # property in pypdf
495 if hasattr(type(self), '_ID'):
496 self.trailers['/ID'] = pdf_id
497 else:
498 self._ID = pdf_id
500 def convert_to_pdfa(self):
501 """
502 Transform the opened PDF file into a PDF/A compliant file
503 """
504 # Set the PDF version to 1.7 (as PDF/A-3 is based on version 1.7) and make it PDF/A compliant.
505 # See https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Parts-2-and-3-rules#rule-612-1
506 self._header = b"%PDF-1.7"
508 # " The file header shall begin at byte zero and shall consist of "%PDF-1.n" followed by a single EOL marker,
509 # where 'n' is a single digit number between 0 (30h) and 7 (37h) "
510 # " The aforementioned EOL marker shall be immediately followed by a % (25h) character followed by at least four
511 # bytes, each of whose encoded byte values shall have a decimal value greater than 127 ".
512 # PyPDF2 2.X+ already adds these 4 characters by default (so ._pypdf2_2 and ._pypdf don't need it).
513 # The injected character `\xc3\xa9` is equivalent to the character `é`.
514 # Therefore, on `_pypdf2_1`, the header will look like: `%PDF-1.7\n%éééé`,
515 # while on `_pypdf2_2` and `_pypdf`, it will look like: `%PDF-1.7\n%âãÏÓ`.
516 if SUBMOD == '._pypdf2_1':
517 self._header += b"\n%\xc3\xa9\xc3\xa9\xc3\xa9\xc3\xa9"
519 # Add a document ID to the trailer. This is only needed when using encryption with regular PDF, but is required
520 # when using PDF/A
521 pdf_id = ByteStringObject(md5(self._reader.stream.getvalue()).digest())
522 # The first string is based on the content at the time of creating the file, while the second is based on the
523 # content of the file when it was last updated. When creating a PDF, both are set to the same value.
524 self._set_id(ArrayObject((pdf_id, pdf_id)))
526 with file_open('tools/data/files/sRGB2014.icc', mode='rb') as icc_profile:
527 icc_profile_file_data = compress(icc_profile.read())
529 icc_profile_stream_obj = DecodedStreamObject()
530 icc_profile_stream_obj.setData(icc_profile_file_data)
531 icc_profile_stream_obj.update({
532 NameObject("/Filter"): NameObject("/FlateDecode"),
533 NameObject("/N"): NumberObject(3),
534 NameObject("/Length"): NameObject(str(len(icc_profile_file_data))),
535 })
537 icc_profile_obj = self._addObject(icc_profile_stream_obj)
539 output_intent_dict_obj = DictionaryObject()
540 output_intent_dict_obj.update({
541 NameObject("/S"): NameObject("/GTS_PDFA1"),
542 NameObject("/OutputConditionIdentifier"): createStringObject("sRGB"),
543 NameObject("/DestOutputProfile"): icc_profile_obj,
544 NameObject("/Type"): NameObject("/OutputIntent"),
545 })
547 output_intent_obj = self._addObject(output_intent_dict_obj)
548 self._root_object.update({
549 NameObject("/OutputIntents"): ArrayObject([output_intent_obj]),
550 })
552 pages = self._root_object['/Pages']['/Kids']
554 # PDF/A needs the glyphs width array embedded in the pdf to be consistent with the ones from the font file.
555 # But it seems like it is not the case when exporting from wkhtmltopdf.
556 try:
557 import fontTools.ttLib # noqa: PLC0415
558 except ImportError:
559 _logger.warning('The fonttools package is not installed. Generated PDF may not be PDF/A compliant.')
560 else:
561 fonts = {}
562 # First browse through all the pages of the pdf file, to get a reference to all the fonts used in the PDF.
563 for page in pages:
564 for font in page.getObject()['/Resources']['/Font'].values():
565 for descendant in font.getObject()['/DescendantFonts']:
566 fonts[descendant.idnum] = descendant.getObject()
568 # Then for each font, rewrite the width array with the information taken directly from the font file.
569 # The new width are calculated such as width = round(1000 * font_glyph_width / font_units_per_em)
570 # See: http://martin.hoppenheit.info/blog/2018/pdfa-validation-and-inconsistent-glyph-width-information/
571 for font in fonts.values():
572 font_file = font['/FontDescriptor']['/FontFile2']
573 stream = io.BytesIO(decompress(font_file._data))
574 ttfont = fontTools.ttLib.TTFont(stream)
575 font_upm = ttfont['head'].unitsPerEm
576 if parse_version(fontTools.__version__) < parse_version('4.37.2'):
577 glyphs = ttfont.getGlyphSet()._hmtx.metrics
578 else:
579 glyphs = ttfont.getGlyphSet().hMetrics
580 glyph_widths = []
581 for key, values in glyphs.items():
582 if key[:5] == 'glyph':
583 glyph_widths.append(NumberObject(round(1000.0 * values[0] / font_upm)))
585 font[NameObject('/W')] = ArrayObject([NumberObject(1), ArrayObject(glyph_widths)])
586 stream.close()
588 outlines = self._root_object['/Outlines'].getObject()
589 outlines[NameObject('/Count')] = NumberObject(1)
591 # [6.7.2.2-1] include a MarkInfo dictionary containing "Marked" with true value
592 mark_info = DictionaryObject({NameObject("/Marked"): BooleanObject(True)})
593 self._root_object[NameObject("/MarkInfo")] = mark_info
595 # [6.7.3.3-1] include minimal document structure in the catalog
596 struct_tree_root = DictionaryObject({NameObject("/Type"): NameObject("/StructTreeRoot")})
597 self._root_object[NameObject("/StructTreeRoot")] = struct_tree_root
599 # Set odoo as producer
600 self.addMetadata({
601 '/Creator': "Odoo",
602 '/Producer': "Odoo",
603 })
604 self.is_pdfa = True
606 def add_file_metadata(self, metadata_content):
607 """
608 Set the XMP metadata of the pdf, wrapping it with the necessary XMP header/footer.
609 These are required for a PDF/A file to be completely compliant. Ommiting them would result in validation errors.
610 :param metadata_content: bytes of the metadata to add to the pdf.
611 """
612 # See https://wwwimages2.adobe.com/content/dam/acom/en/devnet/xmp/pdfs/XMP%20SDK%20Release%20cc-2016-08/XMPSpecificationPart1.pdf
613 # Page 10/11
614 header = b'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>'
615 footer = b'<?xpacket end="w"?>'
616 metadata = b'%s%s%s' % (header, metadata_content, footer)
617 file_entry = DecodedStreamObject()
618 file_entry.setData(metadata)
619 file_entry.update({
620 NameObject("/Type"): NameObject("/Metadata"),
621 NameObject("/Subtype"): NameObject("/XML"),
622 NameObject("/Length"): NameObject(str(len(metadata))),
623 })
625 # Add the new metadata to the pdf, then redirect the reference to refer to this new object.
626 metadata_object = self._addObject(file_entry)
627 self._root_object.update({NameObject("/Metadata"): metadata_object})
629 def _create_attachment_object(self, attachment):
630 ''' Create a PyPdf2.generic object representing an embedded file.
632 :param attachment: A dictionary containing:
633 * filename: The name of the file to embed (required)
634 * content: The bytes of the file to embed (required)
635 * subtype: The mime-type of the file to embed (optional)
636 :return:
637 '''
638 file_entry = DecodedStreamObject()
639 file_entry.setData(attachment['content'])
640 file_entry.update({
641 NameObject("/Type"): NameObject("/EmbeddedFile"),
642 NameObject("/Params"):
643 DictionaryObject({
644 NameObject('/CheckSum'): createStringObject(md5(attachment['content']).hexdigest()),
645 NameObject('/ModDate'): createStringObject(datetime.now().strftime(DEFAULT_PDF_DATETIME_FORMAT)),
646 NameObject('/Size'): NumberObject(len(attachment['content'])),
647 }),
648 })
649 if attachment.get('subtype'):
650 file_entry.update({
651 NameObject("/Subtype"): NameObject(attachment['subtype']),
652 })
653 file_entry_object = self._addObject(file_entry)
654 filename_object = createStringObject(attachment['filename'])
655 filespec_object = DictionaryObject({
656 NameObject("/AFRelationship"): NameObject("/Data"),
657 NameObject("/Type"): NameObject("/Filespec"),
658 NameObject("/F"): filename_object,
659 NameObject("/EF"):
660 DictionaryObject({
661 NameObject("/F"): file_entry_object,
662 NameObject('/UF'): file_entry_object,
663 }),
664 NameObject("/UF"): filename_object,
665 })
666 if attachment.get('description'):
667 filespec_object.update({NameObject("/Desc"): createStringObject(attachment['description'])})
668 return self._addObject(filespec_object)