Skip to content

Commit

Permalink
ENH: Add Page.add_transformation (#883)
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma authored May 22, 2022
1 parent 5703b61 commit adee9f0
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 10 deletions.
63 changes: 61 additions & 2 deletions PyPDF2/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,65 @@ def _pushPopGS(contents: Any, pdf: Any) -> ContentStream: # PdfFileReader
stream.operations.append(([], "Q"))
return stream

def add_transformation(
self,
ctm: Union[Transformation, CompressedTransformationMatrix],
expand: bool = False,
) -> None:
if isinstance(ctm, Transformation):
ctm = ctm.ctm

transformation_func = lambda content: PageObject._addTransformationMatrix(
content, self.pdf, ctm # type: ignore[arg-type]
)

new_content_array = ArrayObject()

content = self.getContents()
if content is not None:
content = ContentStream(content, self.pdf)
content.operations.insert(1, ([], "W"))
content.operations.insert(2, ([], "n"))
if transformation_func is not None:
content = transformation_func(content)
content = PageObject._pushPopGS(content, self.pdf)
new_content_array.append(content)

# if expanding the page to fit a new page, calculate the new media box size
if expand:
corners = [
self.mediaBox.getLowerLeft_x().as_numeric(),
self.mediaBox.getLowerLeft_y().as_numeric(),
self.mediaBox.getUpperLeft_x().as_numeric(),
self.mediaBox.getUpperLeft_y().as_numeric(),
self.mediaBox.getUpperRight_x().as_numeric(),
self.mediaBox.getUpperRight_y().as_numeric(),
self.mediaBox.getLowerRight_x().as_numeric(),
self.mediaBox.getLowerRight_y().as_numeric(),
]

ctm = tuple(float(x) for x in ctm) # type: ignore[assignment]
new_x = [
ctm[0] * corners[i] + ctm[2] * corners[i + 1] + ctm[4]
for i in range(0, 8, 2)
]
new_y = [
ctm[1] * corners[i] + ctm[3] * corners[i + 1] + ctm[5]
for i in range(0, 8, 2)
]

lowerleft = [min(new_x), min(new_y)]
upperright = [max(new_x), max(new_y)]
lowerleft = [min(corners[0], lowerleft[0]), min(corners[1], lowerleft[1])]
upperright = [
max(corners[2], upperright[0]),
max(corners[3], upperright[1]),
]

self.mediaBox.setLowerLeft(lowerleft)
self.mediaBox.setUpperRight(upperright)
self[NameObject(PG.CONTENTS)] = ContentStream(new_content_array, self.pdf)

@staticmethod
def _addTransformationMatrix(
contents: Any, pdf: Any, ctm: CompressedTransformationMatrix
Expand Down Expand Up @@ -356,7 +415,7 @@ def getContents(self) -> Optional[ContentStream]:
else:
return None

def mergePage(self, page2: "PageObject") -> None:
def mergePage(self, page2: "PageObject", expand: bool = False) -> None:
"""
Merge the content streams of two pages into one.
Expand All @@ -369,7 +428,7 @@ def mergePage(self, page2: "PageObject") -> None:
:param PageObject page2: The page to be merged into this one. Should be
an instance of :class:`PageObject<PageObject>`.
"""
self._mergePage(page2)
self._mergePage(page2, expand=expand)

def _mergePage(
self,
Expand Down
8 changes: 8 additions & 0 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ class BooleanObject(PdfObject):
def __init__(self, value: Any) -> None:
self.value = value

def __eq__(self, __o: object) -> bool:
if isinstance(__o, BooleanObject):
return self.value == __o.value
elif isinstance(__o, bool):
return self.value == __o
else:
return False

def writeToStream(
self, stream: StreamType, encryption_key: Union[None, str, bytes]
) -> None:
Expand Down
15 changes: 8 additions & 7 deletions docs/user/cropping-and-transforming.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ page_base = reader_base.pages[0]
reader = PdfFileReader("box.pdf")
page_box = reader.pages[0]

# Apply the transformation: Be aware, that this is an in-place operation
page_base.mergeTransformedPage(page_box, Transformation())
page_base.mergePage(page_box)

# Write the result back
writer = PdfFileWriter()
Expand All @@ -70,9 +69,10 @@ page_base = reader_base.pages[0]
reader = PdfFileReader("box.pdf")
page_box = reader.pages[0]

# Apply the transformation: Be aware, that this is an in-place operation
op = Transformation().rotate(45)
page_base.mergeTransformedPage(page_box, op)
# Apply the transformation
transformation = Transformation().rotate(45)
page_box.add_transformation(transformation)
page_base.mergePage(page_box)

# Write the result back
writer = PdfFileWriter()
Expand All @@ -84,8 +84,9 @@ with open("merged-foo.pdf", "wb") as fp:
If you add the expand parameter:

```python
op = Transformation().rotate(45)
page_base.mergeTransformedPage(page_box, op, expand=True)
transformation = Transformation().rotate(45)
page_box.add_transformation(transformation)
page_base.mergePage(page_box)
```

you get:
Expand Down
12 changes: 12 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ def test_boolean_object_write():
assert stream.read() == b"false"


def test_boolean_eq():
boolobj = BooleanObject(True)
assert (boolobj == True) is True
assert (boolobj == False) is False
assert (boolobj == "True") is False

boolobj = BooleanObject(False)
assert (boolobj == True) is False
assert (boolobj == False) is True
assert (boolobj == "True") is False


def test_boolean_object_exception():
stream = BytesIO(b"False")
with pytest.raises(PdfReadError) as exc:
Expand Down
45 changes: 44 additions & 1 deletion tests/test_page.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import json
import os
from copy import deepcopy

import pytest

from PyPDF2 import PdfFileReader, Transformation
from PyPDF2._page import PageObject
from PyPDF2.generic import RectangleObject
from PyPDF2.constants import PageAttributes as PG
from PyPDF2.generic import DictionaryObject, NameObject, RectangleObject

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
Expand Down Expand Up @@ -74,6 +76,47 @@ def test_page_operations(pdf_path, password):
page.extractText()


def test_transformation_equivalence():
pdf_path = os.path.join(RESOURCE_ROOT, "labeled-edges-center-image.pdf")
reader_base = PdfFileReader(pdf_path)
page_base = reader_base.pages[0]

pdf_path = os.path.join(RESOURCE_ROOT, "box.pdf")
reader_add = PdfFileReader(pdf_path)
page_box = reader_add.pages[0]

op = Transformation().scale(2).rotate(45)

# Option 1: The new way
page_box1 = deepcopy(page_box)
page_base1 = deepcopy(page_base)
page_box1.add_transformation(op, expand=True)
page_base1.mergePage(page_box1, expand=False)

# Option 2: The old way
page_box2 = deepcopy(page_box)
page_base2 = deepcopy(page_base)
page_base2.mergeTransformedPage(page_box2, op, expand=False)

# Should be the smae
assert page_base1[NameObject(PG.CONTENTS)] == page_base2[NameObject(PG.CONTENTS)]
assert page_base1.mediaBox == page_base2.mediaBox
assert page_base1.trimBox == page_base2.trimBox
assert page_base1[NameObject(PG.ANNOTS)] == page_base2[NameObject(PG.ANNOTS)]
compare_dict_objects(
page_base1[NameObject(PG.RESOURCES)], page_base2[NameObject(PG.RESOURCES)]
)


def compare_dict_objects(d1, d2):
assert sorted(d1.keys()) == sorted(d2.keys())
for k in d1.keys():
if isinstance(d1[k], DictionaryObject):
compare_dict_objects(d1[k], d2[k])
else:
assert d1[k] == d2[k]


def test_page_transformations():
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
reader = PdfFileReader(pdf_path)
Expand Down

0 comments on commit adee9f0

Please sign in to comment.