From adee9f0de841355fed41697d0947adcce9c1c814 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 22 May 2022 08:32:00 +0200 Subject: [PATCH] ENH: Add Page.add_transformation (#883) --- PyPDF2/_page.py | 63 +++++++++++++++++++++++++- PyPDF2/generic.py | 8 ++++ docs/user/cropping-and-transforming.md | 15 +++--- tests/test_generic.py | 12 +++++ tests/test_page.py | 45 +++++++++++++++++- 5 files changed, 133 insertions(+), 10 deletions(-) diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py index 8a14a3084..f53ac3183 100644 --- a/PyPDF2/_page.py +++ b/PyPDF2/_page.py @@ -320,6 +320,65 @@ def _pushPopGS(contents: Any, pdf: Any) -> ContentStream: # PdfFileReader stream.operations.append(([], "Q")) return stream + def add_transformation( + self, + ctm: Union[Transformation, CompressedTransformationMatrix], + expand: bool = False, + ) -> None: + if isinstance(ctm, Transformation): + ctm = ctm.ctm + + transformation_func = lambda content: PageObject._addTransformationMatrix( + content, self.pdf, ctm # type: ignore[arg-type] + ) + + new_content_array = ArrayObject() + + content = self.getContents() + if content is not None: + content = ContentStream(content, self.pdf) + content.operations.insert(1, ([], "W")) + content.operations.insert(2, ([], "n")) + if transformation_func is not None: + content = transformation_func(content) + content = PageObject._pushPopGS(content, self.pdf) + new_content_array.append(content) + + # if expanding the page to fit a new page, calculate the new media box size + if expand: + corners = [ + self.mediaBox.getLowerLeft_x().as_numeric(), + self.mediaBox.getLowerLeft_y().as_numeric(), + self.mediaBox.getUpperLeft_x().as_numeric(), + self.mediaBox.getUpperLeft_y().as_numeric(), + self.mediaBox.getUpperRight_x().as_numeric(), + self.mediaBox.getUpperRight_y().as_numeric(), + self.mediaBox.getLowerRight_x().as_numeric(), + self.mediaBox.getLowerRight_y().as_numeric(), + ] + + ctm = tuple(float(x) for x in ctm) # type: ignore[assignment] + new_x = [ + ctm[0] * corners[i] + ctm[2] * corners[i + 1] + ctm[4] + for i in range(0, 8, 2) + ] + new_y = [ + ctm[1] * corners[i] + ctm[3] * corners[i + 1] + ctm[5] + for i in range(0, 8, 2) + ] + + lowerleft = [min(new_x), min(new_y)] + upperright = [max(new_x), max(new_y)] + lowerleft = [min(corners[0], lowerleft[0]), min(corners[1], lowerleft[1])] + upperright = [ + max(corners[2], upperright[0]), + max(corners[3], upperright[1]), + ] + + self.mediaBox.setLowerLeft(lowerleft) + self.mediaBox.setUpperRight(upperright) + self[NameObject(PG.CONTENTS)] = ContentStream(new_content_array, self.pdf) + @staticmethod def _addTransformationMatrix( contents: Any, pdf: Any, ctm: CompressedTransformationMatrix @@ -356,7 +415,7 @@ def getContents(self) -> Optional[ContentStream]: else: return None - def mergePage(self, page2: "PageObject") -> None: + def mergePage(self, page2: "PageObject", expand: bool = False) -> None: """ Merge the content streams of two pages into one. @@ -369,7 +428,7 @@ def mergePage(self, page2: "PageObject") -> None: :param PageObject page2: The page to be merged into this one. Should be an instance of :class:`PageObject`. """ - self._mergePage(page2) + self._mergePage(page2, expand=expand) def _mergePage( self, diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 60c842862..98359c87f 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -100,6 +100,14 @@ class BooleanObject(PdfObject): def __init__(self, value: Any) -> None: self.value = value + def __eq__(self, __o: object) -> bool: + if isinstance(__o, BooleanObject): + return self.value == __o.value + elif isinstance(__o, bool): + return self.value == __o + else: + return False + def writeToStream( self, stream: StreamType, encryption_key: Union[None, str, bytes] ) -> None: diff --git a/docs/user/cropping-and-transforming.md b/docs/user/cropping-and-transforming.md index 6f3f60186..1cabff21a 100644 --- a/docs/user/cropping-and-transforming.md +++ b/docs/user/cropping-and-transforming.md @@ -46,8 +46,7 @@ page_base = reader_base.pages[0] reader = PdfFileReader("box.pdf") page_box = reader.pages[0] -# Apply the transformation: Be aware, that this is an in-place operation -page_base.mergeTransformedPage(page_box, Transformation()) +page_base.mergePage(page_box) # Write the result back writer = PdfFileWriter() @@ -70,9 +69,10 @@ page_base = reader_base.pages[0] reader = PdfFileReader("box.pdf") page_box = reader.pages[0] -# Apply the transformation: Be aware, that this is an in-place operation -op = Transformation().rotate(45) -page_base.mergeTransformedPage(page_box, op) +# Apply the transformation +transformation = Transformation().rotate(45) +page_box.add_transformation(transformation) +page_base.mergePage(page_box) # Write the result back writer = PdfFileWriter() @@ -84,8 +84,9 @@ with open("merged-foo.pdf", "wb") as fp: If you add the expand parameter: ```python -op = Transformation().rotate(45) -page_base.mergeTransformedPage(page_box, op, expand=True) +transformation = Transformation().rotate(45) +page_box.add_transformation(transformation) +page_base.mergePage(page_box) ``` you get: diff --git a/tests/test_generic.py b/tests/test_generic.py index 0124084af..48f7b396c 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -65,6 +65,18 @@ def test_boolean_object_write(): assert stream.read() == b"false" +def test_boolean_eq(): + boolobj = BooleanObject(True) + assert (boolobj == True) is True + assert (boolobj == False) is False + assert (boolobj == "True") is False + + boolobj = BooleanObject(False) + assert (boolobj == True) is False + assert (boolobj == False) is True + assert (boolobj == "True") is False + + def test_boolean_object_exception(): stream = BytesIO(b"False") with pytest.raises(PdfReadError) as exc: diff --git a/tests/test_page.py b/tests/test_page.py index a9b4f3c82..1f93bbf0a 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -1,11 +1,13 @@ import json import os +from copy import deepcopy import pytest from PyPDF2 import PdfFileReader, Transformation from PyPDF2._page import PageObject -from PyPDF2.generic import RectangleObject +from PyPDF2.constants import PageAttributes as PG +from PyPDF2.generic import DictionaryObject, NameObject, RectangleObject TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) PROJECT_ROOT = os.path.dirname(TESTS_ROOT) @@ -74,6 +76,47 @@ def test_page_operations(pdf_path, password): page.extractText() +def test_transformation_equivalence(): + pdf_path = os.path.join(RESOURCE_ROOT, "labeled-edges-center-image.pdf") + reader_base = PdfFileReader(pdf_path) + page_base = reader_base.pages[0] + + pdf_path = os.path.join(RESOURCE_ROOT, "box.pdf") + reader_add = PdfFileReader(pdf_path) + page_box = reader_add.pages[0] + + op = Transformation().scale(2).rotate(45) + + # Option 1: The new way + page_box1 = deepcopy(page_box) + page_base1 = deepcopy(page_base) + page_box1.add_transformation(op, expand=True) + page_base1.mergePage(page_box1, expand=False) + + # Option 2: The old way + page_box2 = deepcopy(page_box) + page_base2 = deepcopy(page_base) + page_base2.mergeTransformedPage(page_box2, op, expand=False) + + # Should be the smae + assert page_base1[NameObject(PG.CONTENTS)] == page_base2[NameObject(PG.CONTENTS)] + assert page_base1.mediaBox == page_base2.mediaBox + assert page_base1.trimBox == page_base2.trimBox + assert page_base1[NameObject(PG.ANNOTS)] == page_base2[NameObject(PG.ANNOTS)] + compare_dict_objects( + page_base1[NameObject(PG.RESOURCES)], page_base2[NameObject(PG.RESOURCES)] + ) + + +def compare_dict_objects(d1, d2): + assert sorted(d1.keys()) == sorted(d2.keys()) + for k in d1.keys(): + if isinstance(d1[k], DictionaryObject): + compare_dict_objects(d1[k], d2[k]) + else: + assert d1[k] == d2[k] + + def test_page_transformations(): pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") reader = PdfFileReader(pdf_path)