Coverage for adhoc-cicd-odoo-odoo / odoo / tools / pdf / __init__.py: 18%

334 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-09 18:15 +0000

1# Part of Odoo. See LICENSE file for full copyright and licensing details. 

2import base64 

3import importlib 

4import io 

5import re 

6import unicodedata 

7import sys 

8from datetime import datetime 

9from hashlib import md5 

10from logging import getLogger 

11from zlib import compress, decompress, decompressobj 

12 

13from PIL import Image, PdfImagePlugin 

14 

15from odoo import modules 

16from odoo.tools.arabic_reshaper import reshape 

17from odoo.tools.parse_version import parse_version 

18from odoo.tools.misc import file_open, SENTINEL 

19 

20# ---------------------------------------------------------- 

21# PyPDF2 hack 

22# ensure that zlib does not throw error -5 when decompressing 

23# because some pdf won't fit into allocated memory 

24# https://docs.python.org/3/library/zlib.html#zlib.decompressobj 

25# ---------------------------------------------------------- 

26try: 

27 import zlib 

28 

29 def _decompress(data): 

30 zobj = zlib.decompressobj() 

31 return zobj.decompress(data) 

32 

33 import PyPDF2.filters # needed after PyPDF2 2.0.0 and before 2.11.0 

34 PyPDF2.filters.decompress = _decompress 

35except ImportError: 

36 pass # no fix required 

37 

38 

39# might be a good case for exception groups 

40error = None 

41# keep pypdf2 2.x first so noble uses that rather than pypdf 4.0 

42for SUBMOD in ['._pypdf2_2', '._pypdf', '._pypdf2_1']: 42 ↛ 50line 42 didn't jump to line 50 because the loop on line 42 didn't complete

43 try: 

44 pypdf = importlib.import_module(SUBMOD, __spec__.name) 

45 break 

46 except ImportError as e: 

47 if error is None: 

48 error = e 

49else: 

50 raise ImportError("pypdf implementation not found") from error 

51del error 

52 

53PdfReaderBase, PdfWriter, filters, generic, errors, create_string_object =\ 

54 pypdf.PdfReader, pypdf.PdfWriter, pypdf.filters, pypdf.generic, pypdf.errors, pypdf.create_string_object 

55# because they got re-exported 

56ArrayObject, BooleanObject, ByteStringObject, DecodedStreamObject, DictionaryObject, IndirectObject, NameObject, NumberObject =\ 

57 generic.ArrayObject, generic.BooleanObject, generic.ByteStringObject, generic.DecodedStreamObject, generic.DictionaryObject, generic.IndirectObject, generic.NameObject, generic.NumberObject 

58 

59# compatibility aliases 

60PdfReadError = errors.PdfReadError # moved in 2.0 

61PdfStreamError = errors.PdfStreamError # moved in 2.0 

62createStringObject = create_string_object # deprecated in 2.0, removed in 5.0 

63try: 

64 DependencyError = errors.DependencyError 

65except AttributeError: 

66 DependencyError = NotImplementedError 

67 

68# ---------------------------------------------------------- 

69# PyPDF2 hack 

70# ensure that zlib does not throw error -5 when decompressing 

71# because some pdf won't fit into allocated memory 

72# https://docs.python.org/3/library/zlib.html#zlib.decompressobj 

73# ---------------------------------------------------------- 

74pypdf.filters.decompress = lambda data: decompressobj().decompress(data) 

75 

76 

77# monkey patch to discard unused arguments as the old arguments were not discarded in the transitional class 

78# This keep the old default value of the `strict` argument 

79# https://github.com/py-pdf/pypdf/blob/1.26.0/PyPDF2/pdf.py#L1061 

80# https://pypdf2.readthedocs.io/en/2.0.0/_modules/PyPDF2/_reader.html#PdfReader 

81class PdfReader(PdfReaderBase): 

82 def __init__(self, stream, strict=True, *args, **kwargs): 

83 super().__init__(stream, strict) 

84 

85 

86# Ensure that PdfFileReader and PdfFileWriter are available in case it's still used somewhere 

87PdfFileReader = pypdf.PdfFileReader = PdfReader 

88pypdf.PdfFileWriter = PdfWriter 

89 

90_logger = getLogger(__name__) 

91DEFAULT_PDF_DATETIME_FORMAT = "D:%Y%m%d%H%M%S+00'00'" 

92REGEX_SUBTYPE_UNFORMATED = re.compile(r'^\w+/[\w-]+$') 

93REGEX_SUBTYPE_FORMATED = re.compile(r'^/\w+#2F[\w-]+$') 

94 

95 

96# Disable linter warning: this import is needed to make sure a PDF stream can be saved in Image. 

97PdfImagePlugin.__name__ 

98 

99 

100# make sure values are unwrapped by calling the specialized __getitem__ 

101def _unwrapping_get(self, key, default=None): 

102 try: 

103 return self[key] 

104 except KeyError: 

105 return default 

106 

107 

108DictionaryObject.get = _unwrapping_get 

109 

110 

111if hasattr(NameObject, 'renumber_table'): 111 ↛ 121line 111 didn't jump to line 121 because the condition on line 111 was always true

112 # Make sure all the correct delimiters are included 

113 # We will make this change only if pypdf has the renumber_table attribute 

114 # https://github.com/py-pdf/pypdf/commit/8c542f331828c5839fda48442d89b8ac5d3984ac 

115 NameObject.renumber_table.update({ 

116 **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"}, 

117 **{chr(i): f"#{i:02X}".encode() for i in range(33)}, 

118 }) 

119 

120 

121if hasattr(PdfWriter, 'write_stream'): 121 ↛ 132line 121 didn't jump to line 132 because the condition on line 121 was always true

122 # >= 2.x has a utility `write` which can open a path, so `write_stream` could be called directly 

123 class BrandedFileWriter(PdfWriter): 

124 def write_stream(self, *args, **kwargs): 

125 self.add_metadata({ 

126 '/Creator': "Odoo", 

127 '/Producer': "Odoo", 

128 }) 

129 super().write_stream(*args, **kwargs) 

130else: 

131 # 1.x has a monolithic write method 

132 class BrandedFileWriter(PdfWriter): 

133 def write(self, *args, **kwargs): 

134 self.addMetadata({ 

135 '/Creator': "Odoo", 

136 '/Producer': "Odoo", 

137 }) 

138 super().write(*args, **kwargs) 

139 

140 

141PdfFileWriter = BrandedFileWriter 

142 

143 

144def merge_pdf(pdf_data): 

145 ''' Merge a collection of PDF documents in one. 

146 Note that the attachments are not merged. 

147 :param list pdf_data: a list of PDF datastrings 

148 :return: a unique merged PDF datastring 

149 ''' 

150 writer = PdfFileWriter() 

151 for document in pdf_data: 

152 reader = PdfFileReader(io.BytesIO(document), strict=False) 

153 for page in range(0, reader.getNumPages()): 

154 writer.addPage(reader.getPage(page)) 

155 

156 with io.BytesIO() as _buffer: 

157 writer.write(_buffer) 

158 return _buffer.getvalue() 

159 

160 

161def fill_form_fields_pdf(writer, form_fields): 

162 ''' Fill in the form fields of a PDF 

163 :param writer: a PdfFileWriter object 

164 :param dict form_fields: a dictionary of form fields to update in the PDF 

165 :return: a filled PDF datastring 

166 ''' 

167 

168 # This solves a known problem with PyPDF2, where with some pdf software, forms fields aren't 

169 # correctly filled until the user click on it, see: https://github.com/py-pdf/pypdf/issues/355 

170 if hasattr(writer, 'set_need_appearances_writer'): 

171 writer.set_need_appearances_writer() 

172 is_upper_version_pypdf2 = True 

173 else: # This method was renamed in PyPDF2 2.0 

174 is_upper_version_pypdf2 = False 

175 catalog = writer._root_object 

176 # get the AcroForm tree 

177 if "/AcroForm" not in catalog: 

178 writer._root_object.update({ 

179 NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer) 

180 }) 

181 writer._root_object["/AcroForm"][NameObject("/NeedAppearances")] = BooleanObject(True) 

182 

183 nbr_pages = len(writer.pages) if is_upper_version_pypdf2 else writer.getNumPages() 

184 

185 for page_id in range(0, nbr_pages): 

186 page = writer.getPage(page_id) 

187 

188 if is_upper_version_pypdf2: 

189 writer.update_page_form_field_values(page, form_fields) 

190 else: 

191 # Known bug on previous versions of PyPDF2, fixed in 2.11 

192 if not page.get('/Annots'): 

193 _logger.info("No fields to update in this page") 

194 else: 

195 try: 

196 writer.updatePageFormFieldValues(page, form_fields) 

197 except ValueError: 

198 # Known bug on previous versions of PyPDF2 for some PDFs, fixed in 2.4.2 

199 _logger.info("Fields couldn't be filled in this page.") 

200 continue 

201 

202 

203def rotate_pdf(pdf): 

204 ''' Rotate clockwise PDF (90°) into a new PDF. 

205 Note that the attachments are not copied. 

206 :param pdf: a PDF to rotate 

207 :return: a PDF rotated 

208 ''' 

209 writer = PdfFileWriter() 

210 reader = PdfFileReader(io.BytesIO(pdf), strict=False) 

211 for page in range(0, reader.getNumPages()): 

212 page = reader.getPage(page) 

213 page.rotateClockwise(90) 

214 writer.addPage(page) 

215 with io.BytesIO() as _buffer: 

216 writer.write(_buffer) 

217 return _buffer.getvalue() 

218 

219 

220def to_pdf_stream(attachment) -> io.BytesIO | None: 

221 """Get the byte stream of the attachment as a PDF.""" 

222 if not attachment.raw: 

223 _logger.warning("%s has no raw data.", attachment) 

224 return None 

225 

226 if attachment_raw := attachment._get_pdf_raw(): 

227 return io.BytesIO(attachment_raw) 

228 stream = io.BytesIO(attachment.raw) 

229 if attachment.mimetype.startswith('image'): 

230 output_stream = io.BytesIO() 

231 Image.open(stream).convert("RGB").save(output_stream, format="pdf") 

232 return output_stream 

233 _logger.warning("mimetype (%s) not recognized for %s", attachment.mimetype, attachment) 

234 return None 

235 

236 

237def extract_page(attachment, num_page=0) -> io.BytesIO | None: 

238 """Exctract a specific page form an attachement pdf""" 

239 pdf_stream = to_pdf_stream(attachment) 

240 if not pdf_stream: 

241 return 

242 pdf = PdfFileReader(pdf_stream) 

243 page = pdf.getPage(num_page) 

244 pdf_writer = PdfFileWriter() 

245 pdf_writer.addPage(page) 

246 stream = io.BytesIO() 

247 pdf_writer.write(stream) 

248 return stream 

249 

250 

251def add_banner(pdf_stream, text=None, logo=False, thickness=SENTINEL): 

252 """ Add a banner on a PDF in the upper right corner, with Odoo's logo (optionally). 

253 

254 :param pdf_stream (BytesIO): The PDF stream where the banner will be applied. 

255 :param text (str): The text to be displayed. 

256 :param logo (bool): Whether to display Odoo's logo in the banner. 

257 :param thickness (float): The thickness of the banner in pixels (default: 2cm). 

258 :return (BytesIO): The modified PDF stream. 

259 """ 

260 from reportlab.lib import colors # noqa: PLC0415 

261 from reportlab.lib.utils import ImageReader # noqa: PLC0415 

262 from reportlab.pdfgen import canvas # noqa: PLC0415 

263 

264 if thickness is SENTINEL: 

265 from reportlab.lib.units import cm # noqa: PLC0415 

266 thickness = 2 * cm 

267 

268 old_pdf = PdfFileReader(pdf_stream, strict=False, overwriteWarnings=False) 

269 packet = io.BytesIO() 

270 can = canvas.Canvas(packet) 

271 with file_open('base/static/img/main_partner-image.png', mode='rb') as f: 

272 odoo_logo_file = io.BytesIO(f.read()) 

273 odoo_logo = Image.open(odoo_logo_file) 

274 odoo_color = colors.Color(113 / 255, 75 / 255, 103 / 255, 0.8) 

275 

276 for p in range(old_pdf.getNumPages()): 

277 page = old_pdf.getPage(p) 

278 width = float(abs(page.mediaBox.getWidth())) 

279 height = float(abs(page.mediaBox.getHeight())) 

280 

281 can.setPageSize((width, height)) 

282 can.translate(width, height) 

283 can.rotate(-45) 

284 

285 # Draw banner 

286 path = can.beginPath() 

287 path.moveTo(-width, -thickness) 

288 path.lineTo(-width, -2 * thickness) 

289 path.lineTo(width, -2 * thickness) 

290 path.lineTo(width, -thickness) 

291 can.setFillColor(odoo_color) 

292 can.drawPath(path, fill=1, stroke=False) 

293 

294 # Insert text (and logo) inside the banner 

295 can.setFontSize(10) 

296 can.setFillColor(colors.white) 

297 can.drawRightString(0.75 * thickness, -1.45 * thickness, text) 

298 logo and can.drawImage( 

299 ImageReader(odoo_logo), 0.25 * thickness, -2.05 * thickness, 40, 40, mask='auto', preserveAspectRatio=True) 

300 

301 can.showPage() 

302 

303 can.save() 

304 

305 # Merge the old pages with the watermark 

306 watermark_pdf = PdfFileReader(packet, overwriteWarnings=False) 

307 new_pdf = PdfFileWriter() 

308 for p in range(old_pdf.getNumPages()): 

309 new_page = old_pdf.getPage(p) 

310 # Remove annotations (if any), to prevent errors in PyPDF2 

311 if '/Annots' in new_page: 

312 del new_page['/Annots'] 

313 new_page.mergePage(watermark_pdf.getPage(p)) 

314 new_pdf.addPage(new_page) 

315 

316 # Write the new pdf into a new output stream 

317 output = io.BytesIO() 

318 new_pdf.write(output) 

319 

320 return output 

321 

322 

323def reshape_text(text): 

324 """ 

325 Display the text based on his first character unicode name to choose Right-to-left or Left-to-right 

326 This is just a hotfix to make things work 

327 In the future the clean way be to use arabic-reshaper and python3-bidi libraries 

328 

329 

330 Here we want to check the text is in a right-to-left language and if then, flip before returning it. 

331 Depending on the language, the type should be Left-to-Right, Right-to-Left, or Right-to-Left Arabic 

332 (Refer to this https://www.unicode.org/reports/tr9/#Bidirectional_Character_Types) 

333 The base module ```unicodedata``` with his function ```bidirectional(str)``` helps us by taking a character in 

334 argument and returns his type: 

335 - 'L' for Left-to-Right character 

336 - 'R' or 'AL' for Right-to-Left character 

337 

338 So we have to check if the first character of the text is of type 'R' or 'AL', and check that there is no 

339 character in the rest of the text that is of type 'L'. Based on that we can confirm we have a fully Right-to-Left language, 

340 then we can flip the text before returning it. 

341 """ 

342 if not text: 

343 return '' 

344 maybe_rtl_letter = text.lstrip()[:1] or ' ' 

345 maybe_ltr_text = text[1:] 

346 first_letter_is_rtl = unicodedata.bidirectional(maybe_rtl_letter) in ('AL', 'R') 

347 no_letter_is_ltr = not any(unicodedata.bidirectional(letter) == 'L' for letter in maybe_ltr_text) 

348 if first_letter_is_rtl and no_letter_is_ltr: 

349 text = reshape(text) 

350 text = text[::-1] 

351 

352 return text 

353 

354 

355class OdooPdfFileReader(PdfFileReader): 

356 # OVERRIDE of PdfFileReader to add the management of multiple embedded files. 

357 

358 ''' Returns the files inside the PDF. 

359 :raises NotImplementedError: if document is encrypted and uses an unsupported encryption method. 

360 ''' 

361 def getAttachments(self): 

362 if self.isEncrypted: 

363 # If the PDF is owner-encrypted, try to unwrap it by giving it an empty user password. 

364 self.decrypt('') 

365 

366 try: 

367 file_path = self.trailer["/Root"].get("/Names", {}).get("/EmbeddedFiles", {}).get("/Names") 

368 

369 if not file_path: 

370 return [] 

371 for p in file_path[1::2]: 

372 attachment = p.getObject() 

373 yield (attachment["/F"], attachment["/EF"]["/F"].getObject().getData()) 

374 except Exception: # noqa: BLE001 

375 # malformed pdf (i.e. invalid xref page) 

376 return [] 

377 

378 

379class OdooPdfFileWriter(PdfFileWriter): 

380 

381 def __init__(self, *args, **kwargs): 

382 """ 

383 Override of the init to initialise additional variables. 

384 :param pdf_content: if given, will initialise the reader with the pdf content. 

385 """ 

386 super().__init__(*args, **kwargs) 

387 self._reader = None 

388 self.is_pdfa = False 

389 

390 def format_subtype(self, subtype): 

391 """ 

392 Apply the correct format to the subtype. 

393 It should take the form of "/xxx#2Fxxx". E.g. for "text/xml": "/text#2Fxml" 

394 :param subtype: The mime-type of the attachement. 

395 """ 

396 if not subtype: 

397 return subtype 

398 

399 adapted_subtype = subtype 

400 if REGEX_SUBTYPE_UNFORMATED.match(subtype): 

401 # _pypdf2_2 and _pypdf does the formating when creating a NameObject 

402 if SUBMOD in ('._pypdf2_2', '._pypdf'): 

403 return '/' + subtype 

404 adapted_subtype = '/' + subtype.replace('/', '#2F') 

405 

406 if not REGEX_SUBTYPE_FORMATED.match(adapted_subtype): 

407 # The subtype still does not match the correct format, so we will not add it to the document 

408 _logger.warning("Attempt to add an attachment with the incorrect subtype '%s'. The subtype will be ignored.", subtype) 

409 adapted_subtype = '' 

410 return adapted_subtype 

411 

412 def add_attachment(self, name, data, subtype=None): 

413 """ 

414 Add an attachment to the pdf. Supports adding multiple attachment, while respecting PDF/A rules. 

415 :param name: The name of the attachement 

416 :param data: The data of the attachement 

417 :param subtype: The mime-type of the attachement. This is required by PDF/A, but not essential otherwise. 

418 """ 

419 adapted_subtype = self.format_subtype(subtype) 

420 

421 attachment = self._create_attachment_object({ 

422 'filename': name, 

423 'content': data, 

424 'subtype': adapted_subtype, 

425 }) 

426 if self._root_object.get('/Names') and self._root_object['/Names'].get('/EmbeddedFiles'): 

427 names_array = self._root_object["/Names"]["/EmbeddedFiles"]["/Names"] 

428 names_array.extend([attachment.getObject()['/F'], attachment]) 

429 else: 

430 names_array = ArrayObject() 

431 names_array.extend([attachment.getObject()['/F'], attachment]) 

432 

433 embedded_files_names_dictionary = DictionaryObject() 

434 embedded_files_names_dictionary.update({ 

435 NameObject("/Names"): names_array 

436 }) 

437 embedded_files_dictionary = DictionaryObject() 

438 embedded_files_dictionary.update({ 

439 NameObject("/EmbeddedFiles"): embedded_files_names_dictionary 

440 }) 

441 self._root_object.update({ 

442 NameObject("/Names"): embedded_files_dictionary 

443 }) 

444 

445 if self._root_object.get('/AF'): 

446 attachment_array = self._root_object['/AF'] 

447 attachment_array.extend([attachment]) 

448 else: 

449 # Create a new object containing an array referencing embedded file 

450 # And reference this array in the root catalogue 

451 attachment_array = self._addObject(ArrayObject([attachment])) 

452 self._root_object.update({ 

453 NameObject("/AF"): attachment_array 

454 }) 

455 addAttachment = add_attachment 

456 

457 def embed_odoo_attachment(self, attachment, subtype=None): 

458 assert attachment, "embed_odoo_attachment cannot be called without attachment." 

459 self.addAttachment(attachment.name, attachment.raw, subtype=subtype or attachment.mimetype) 

460 

461 def cloneReaderDocumentRoot(self, reader): 

462 super().cloneReaderDocumentRoot(reader) 

463 self._reader = reader 

464 # Try to read the header coming in, and reuse it in our new PDF 

465 # This is done in order to allows modifying PDF/A files after creating them (as PyPDF does not read it) 

466 stream = reader.stream 

467 stream.seek(0) 

468 header = stream.readlines(9) 

469 # Should always be true, the first line of a pdf should have 9 bytes (%PDF-1.x plus a newline) 

470 if len(header) == 1: 

471 # If we found a header, set it back to the new pdf 

472 self._header = header[0] 

473 # Also check the second line. If it is PDF/A, it should be a line starting by % following by four bytes + \n 

474 second_line = stream.readlines(1)[0] 

475 if second_line.decode('latin-1')[0] == '%' and len(second_line) == 6: 

476 self.is_pdfa = True 

477 # This is broken in pypdf 3+ and pypdf2 has been automatically 

478 # writing a binary comment since 1.27 

479 # py-pdf/pypdf@036789a4664e3f572292bc7dceec10f08b7dbf62 so we 

480 # only need this if running on 1.x 

481 # 

482 # incidentally that means the heuristic above is completely broken 

483 if SUBMOD == '._pypdf2_1': 

484 self._header += second_line 

485 # clone_reader_document_root clones reader._ID since 3.2 (py-pdf/pypdf#1520) 

486 if not hasattr(self, '_ID'): 

487 # Look if we have an ID in the incoming stream and use it. 

488 self._set_id(reader.trailer.get('/ID', None)) 

489 

490 def _set_id(self, pdf_id): 

491 if not pdf_id: 

492 return 

493 

494 # property in pypdf 

495 if hasattr(type(self), '_ID'): 

496 self.trailers['/ID'] = pdf_id 

497 else: 

498 self._ID = pdf_id 

499 

500 def convert_to_pdfa(self): 

501 """ 

502 Transform the opened PDF file into a PDF/A compliant file 

503 """ 

504 # Set the PDF version to 1.7 (as PDF/A-3 is based on version 1.7) and make it PDF/A compliant. 

505 # See https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Parts-2-and-3-rules#rule-612-1 

506 self._header = b"%PDF-1.7" 

507 

508 # " The file header shall begin at byte zero and shall consist of "%PDF-1.n" followed by a single EOL marker, 

509 # where 'n' is a single digit number between 0 (30h) and 7 (37h) " 

510 # " The aforementioned EOL marker shall be immediately followed by a % (25h) character followed by at least four 

511 # bytes, each of whose encoded byte values shall have a decimal value greater than 127 ". 

512 # PyPDF2 2.X+ already adds these 4 characters by default (so ._pypdf2_2 and ._pypdf don't need it). 

513 # The injected character `\xc3\xa9` is equivalent to the character `é`. 

514 # Therefore, on `_pypdf2_1`, the header will look like: `%PDF-1.7\n%éééé`, 

515 # while on `_pypdf2_2` and `_pypdf`, it will look like: `%PDF-1.7\n%âãÏÓ`. 

516 if SUBMOD == '._pypdf2_1': 

517 self._header += b"\n%\xc3\xa9\xc3\xa9\xc3\xa9\xc3\xa9" 

518 

519 # Add a document ID to the trailer. This is only needed when using encryption with regular PDF, but is required 

520 # when using PDF/A 

521 pdf_id = ByteStringObject(md5(self._reader.stream.getvalue()).digest()) 

522 # The first string is based on the content at the time of creating the file, while the second is based on the 

523 # content of the file when it was last updated. When creating a PDF, both are set to the same value. 

524 self._set_id(ArrayObject((pdf_id, pdf_id))) 

525 

526 with file_open('tools/data/files/sRGB2014.icc', mode='rb') as icc_profile: 

527 icc_profile_file_data = compress(icc_profile.read()) 

528 

529 icc_profile_stream_obj = DecodedStreamObject() 

530 icc_profile_stream_obj.setData(icc_profile_file_data) 

531 icc_profile_stream_obj.update({ 

532 NameObject("/Filter"): NameObject("/FlateDecode"), 

533 NameObject("/N"): NumberObject(3), 

534 NameObject("/Length"): NameObject(str(len(icc_profile_file_data))), 

535 }) 

536 

537 icc_profile_obj = self._addObject(icc_profile_stream_obj) 

538 

539 output_intent_dict_obj = DictionaryObject() 

540 output_intent_dict_obj.update({ 

541 NameObject("/S"): NameObject("/GTS_PDFA1"), 

542 NameObject("/OutputConditionIdentifier"): createStringObject("sRGB"), 

543 NameObject("/DestOutputProfile"): icc_profile_obj, 

544 NameObject("/Type"): NameObject("/OutputIntent"), 

545 }) 

546 

547 output_intent_obj = self._addObject(output_intent_dict_obj) 

548 self._root_object.update({ 

549 NameObject("/OutputIntents"): ArrayObject([output_intent_obj]), 

550 }) 

551 

552 pages = self._root_object['/Pages']['/Kids'] 

553 

554 # PDF/A needs the glyphs width array embedded in the pdf to be consistent with the ones from the font file. 

555 # But it seems like it is not the case when exporting from wkhtmltopdf. 

556 try: 

557 import fontTools.ttLib # noqa: PLC0415 

558 except ImportError: 

559 _logger.warning('The fonttools package is not installed. Generated PDF may not be PDF/A compliant.') 

560 else: 

561 fonts = {} 

562 # First browse through all the pages of the pdf file, to get a reference to all the fonts used in the PDF. 

563 for page in pages: 

564 for font in page.getObject()['/Resources']['/Font'].values(): 

565 for descendant in font.getObject()['/DescendantFonts']: 

566 fonts[descendant.idnum] = descendant.getObject() 

567 

568 # Then for each font, rewrite the width array with the information taken directly from the font file. 

569 # The new width are calculated such as width = round(1000 * font_glyph_width / font_units_per_em) 

570 # See: http://martin.hoppenheit.info/blog/2018/pdfa-validation-and-inconsistent-glyph-width-information/ 

571 for font in fonts.values(): 

572 font_file = font['/FontDescriptor']['/FontFile2'] 

573 stream = io.BytesIO(decompress(font_file._data)) 

574 ttfont = fontTools.ttLib.TTFont(stream) 

575 font_upm = ttfont['head'].unitsPerEm 

576 if parse_version(fontTools.__version__) < parse_version('4.37.2'): 

577 glyphs = ttfont.getGlyphSet()._hmtx.metrics 

578 else: 

579 glyphs = ttfont.getGlyphSet().hMetrics 

580 glyph_widths = [] 

581 for key, values in glyphs.items(): 

582 if key[:5] == 'glyph': 

583 glyph_widths.append(NumberObject(round(1000.0 * values[0] / font_upm))) 

584 

585 font[NameObject('/W')] = ArrayObject([NumberObject(1), ArrayObject(glyph_widths)]) 

586 stream.close() 

587 

588 outlines = self._root_object['/Outlines'].getObject() 

589 outlines[NameObject('/Count')] = NumberObject(1) 

590 

591 # [6.7.2.2-1] include a MarkInfo dictionary containing "Marked" with true value 

592 mark_info = DictionaryObject({NameObject("/Marked"): BooleanObject(True)}) 

593 self._root_object[NameObject("/MarkInfo")] = mark_info 

594 

595 # [6.7.3.3-1] include minimal document structure in the catalog 

596 struct_tree_root = DictionaryObject({NameObject("/Type"): NameObject("/StructTreeRoot")}) 

597 self._root_object[NameObject("/StructTreeRoot")] = struct_tree_root 

598 

599 # Set odoo as producer 

600 self.addMetadata({ 

601 '/Creator': "Odoo", 

602 '/Producer': "Odoo", 

603 }) 

604 self.is_pdfa = True 

605 

606 def add_file_metadata(self, metadata_content): 

607 """ 

608 Set the XMP metadata of the pdf, wrapping it with the necessary XMP header/footer. 

609 These are required for a PDF/A file to be completely compliant. Ommiting them would result in validation errors. 

610 :param metadata_content: bytes of the metadata to add to the pdf. 

611 """ 

612 # See https://wwwimages2.adobe.com/content/dam/acom/en/devnet/xmp/pdfs/XMP%20SDK%20Release%20cc-2016-08/XMPSpecificationPart1.pdf 

613 # Page 10/11 

614 header = b'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>' 

615 footer = b'<?xpacket end="w"?>' 

616 metadata = b'%s%s%s' % (header, metadata_content, footer) 

617 file_entry = DecodedStreamObject() 

618 file_entry.setData(metadata) 

619 file_entry.update({ 

620 NameObject("/Type"): NameObject("/Metadata"), 

621 NameObject("/Subtype"): NameObject("/XML"), 

622 NameObject("/Length"): NameObject(str(len(metadata))), 

623 }) 

624 

625 # Add the new metadata to the pdf, then redirect the reference to refer to this new object. 

626 metadata_object = self._addObject(file_entry) 

627 self._root_object.update({NameObject("/Metadata"): metadata_object}) 

628 

629 def _create_attachment_object(self, attachment): 

630 ''' Create a PyPdf2.generic object representing an embedded file. 

631 

632 :param attachment: A dictionary containing: 

633 * filename: The name of the file to embed (required) 

634 * content: The bytes of the file to embed (required) 

635 * subtype: The mime-type of the file to embed (optional) 

636 :return: 

637 ''' 

638 file_entry = DecodedStreamObject() 

639 file_entry.setData(attachment['content']) 

640 file_entry.update({ 

641 NameObject("/Type"): NameObject("/EmbeddedFile"), 

642 NameObject("/Params"): 

643 DictionaryObject({ 

644 NameObject('/CheckSum'): createStringObject(md5(attachment['content']).hexdigest()), 

645 NameObject('/ModDate'): createStringObject(datetime.now().strftime(DEFAULT_PDF_DATETIME_FORMAT)), 

646 NameObject('/Size'): NumberObject(len(attachment['content'])), 

647 }), 

648 }) 

649 if attachment.get('subtype'): 

650 file_entry.update({ 

651 NameObject("/Subtype"): NameObject(attachment['subtype']), 

652 }) 

653 file_entry_object = self._addObject(file_entry) 

654 filename_object = createStringObject(attachment['filename']) 

655 filespec_object = DictionaryObject({ 

656 NameObject("/AFRelationship"): NameObject("/Data"), 

657 NameObject("/Type"): NameObject("/Filespec"), 

658 NameObject("/F"): filename_object, 

659 NameObject("/EF"): 

660 DictionaryObject({ 

661 NameObject("/F"): file_entry_object, 

662 NameObject('/UF'): file_entry_object, 

663 }), 

664 NameObject("/UF"): filename_object, 

665 }) 

666 if attachment.get('description'): 

667 filespec_object.update({NameObject("/Desc"): createStringObject(attachment['description'])}) 

668 return self._addObject(filespec_object)