Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix fields update where annotations are kids of field #2570

Merged
merged 10 commits into from
Apr 2, 2024
137 changes: 70 additions & 67 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,13 +747,24 @@
if callable(after_page_append):
after_page_append(writer_page)

def _update_text_field(self, field: DictionaryObject) -> None:
def _update_field_annotation(
self, field: DictionaryObject, anno: DictionaryObject
) -> None:
# Calculate rectangle dimensions
_rct = cast(RectangleObject, field[AA.Rect])
_rct = cast(RectangleObject, anno[AA.Rect])
rct = RectangleObject((0, 0, _rct[2] - _rct[0], _rct[3] - _rct[1]))

# Extract font information
da = cast(str, field[AA.DA])
da = anno.get_herited(
pubpub-zz marked this conversation as resolved.
Show resolved Hide resolved
AA.DA,
cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get(
AA.DA, None
),
)
if da is None:
da = TextStringObject("/Helv 0 Tf 0 g")

Check warning on line 765 in pypdf/_writer.py

View check run for this annotation

Codecov / codecov/patch

pypdf/_writer.py#L765

Added line #L765 was not covered by tests
else:
da = da.get_object()
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
font_properties = [x for x in font_properties if x != ""]
font_name = font_properties[font_properties.index("Tf") - 2]
Expand All @@ -767,19 +778,27 @@
# Retrieve font information from local DR ...
dr: Any = cast(
DictionaryObject,
cast(DictionaryObject, field.get("/DR", DictionaryObject())).get_object(),
cast(
DictionaryObject,
anno.get_herited(
"/DR",
cast(
DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
).get("/DR", DictionaryObject()),
),
).get_object(),
)
dr = dr.get("/Font", DictionaryObject()).get_object()
if font_name not in dr:
# ...or AcroForm dictionary
dr = cast(
Dict[Any, Any],
cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {}),
cast(
DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
).get("/DR", {}),
)
if isinstance(dr, IndirectObject): # pragma: no cover
dr = dr.get_object()
dr = dr.get("/Font", DictionaryObject()).get_object()
font_res = dr.get(font_name)
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()

Check warning on line 800 in pypdf/_writer.py

View check run for this annotation

Codecov / codecov/patch

pypdf/_writer.py#L800

Added line #L800 was not covered by tests
font_res = dr.get(font_name, None)
if font_res is not None:
font_res = cast(DictionaryObject, font_res.get_object())
font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
Expand All @@ -806,7 +825,7 @@
# Retrieve field text and selected values
field_flags = field.get(FA.Ff, 0)
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
txt = "\n".join(field.get(FA.Opt, {}))
txt = "\n".join(anno.get_herited(FA.Opt, {}))
pubpub-zz marked this conversation as resolved.
Show resolved Hide resolved
sel = field.get("/V", [])
if not isinstance(sel, list):
sel = [sel]
Expand All @@ -822,7 +841,7 @@
# may be improved but can not find how get fill working => replaced with lined box
ap_stream += (
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
f"0.5 0.5 0.5 rg s\n{field[AA.DA]}\n"
f"0.5 0.5 0.5 rg s\n{da}\n"
).encode()
if line_number == 0:
ap_stream += f"2 {y_offset} Td\n".encode()
Expand Down Expand Up @@ -862,16 +881,16 @@
)
}
)
if AA.AP not in field:
field[NameObject(AA.AP)] = DictionaryObject(
if AA.AP not in anno:
anno[NameObject(AA.AP)] = DictionaryObject(
{NameObject("/N"): self._add_object(dct)}
)
elif "/N" not in cast(DictionaryObject, field[AA.AP]):
cast(DictionaryObject, field[NameObject(AA.AP)])[
elif "/N" not in cast(DictionaryObject, anno[AA.AP]):
cast(DictionaryObject, anno[NameObject(AA.AP)])[
NameObject("/N")
] = self._add_object(dct)
else: # [/AP][/N] exists
n = field[AA.AP]["/N"].indirect_reference.idnum # type: ignore
n = anno[AA.AP]["/N"].indirect_reference.idnum # type: ignore
self._objects[n - 1] = dct
dct.indirect_reference = IndirectObject(n, 0, self)

Expand Down Expand Up @@ -906,65 +925,49 @@
raise PyPdfError("No /Fields dictionary in Pdf in PdfWriter Object")
if isinstance(auto_regenerate, bool):
self.set_need_appearances_writer(auto_regenerate)
# Iterate through pages, update field values
if PG.ANNOTS not in page:
logger_warning("No fields to update on this page", __name__)
return
# /Helvetica is just in case of but this is normally insufficient as we miss the font resource
default_da = af.get(
InteractiveFormDictEntries.DA, TextStringObject("/Helvetica 0 Tf 0 g")
)
for writer_annot in page[PG.ANNOTS]: # type: ignore
writer_annot = cast(DictionaryObject, writer_annot.get_object())
# retrieve parent field values, if present
writer_parent_annot = writer_annot.get(
PG.PARENT, DictionaryObject()
).get_object()
if writer_annot.get("/Subtype", "") != "/Widget":
continue

Check warning on line 934 in pypdf/_writer.py

View check run for this annotation

Codecov / codecov/patch

pypdf/_writer.py#L934

Added line #L934 was not covered by tests
if "/FT" in writer_annot and "/T" in writer_annot:
writer_parent_annot = writer_annot
else:
writer_parent_annot = writer_annot.get(
PG.PARENT, DictionaryObject()
).get_object()

for field, value in fields.items():
if (
writer_annot.get(FA.T) == field
or self._get_qualified_field_name(writer_annot) == field
if not (
self._get_qualified_field_name(writer_parent_annot) == field
or writer_parent_annot.get("/T", None) == field
):
if isinstance(value, list):
lst = ArrayObject(TextStringObject(v) for v in value)
writer_annot[NameObject(FA.V)] = lst
else:
writer_annot[NameObject(FA.V)] = TextStringObject(value)
if writer_annot.get(FA.FT) in ("/Btn"):
# case of Checkbox button (no /FT found in Radio widgets
writer_annot[NameObject(AA.AS)] = NameObject(value)
elif (
writer_annot.get(FA.FT) == "/Tx"
or writer_annot.get(FA.FT) == "/Ch"
):
# textbox
if AA.DA not in writer_annot:
f = writer_annot
da = default_da
while AA.DA not in f:
f = f.get("/Parent")
if f is None:
break
f = f.get_object()
if AA.DA in f:
da = f[AA.DA]
writer_annot[NameObject(AA.DA)] = da
self._update_text_field(writer_annot)
elif writer_annot.get(FA.FT) == "/Sig":
# signature
logger_warning("Signature forms not implemented yet", __name__)
if flags:
writer_annot[NameObject(FA.Ff)] = NumberObject(flags)
continue
if flags:
writer_annot[NameObject(FA.Ff)] = NumberObject(flags)
if isinstance(value, list):
lst = ArrayObject(TextStringObject(v) for v in value)
writer_parent_annot[NameObject(FA.V)] = lst
else:
writer_parent_annot[NameObject(FA.V)] = TextStringObject(value)
if writer_parent_annot.get(FA.FT) in ("/Btn"):
# case of Checkbox button (no /FT found in Radio widgets
v = NameObject(value)
if v not in writer_annot[NameObject(AA.AP)][NameObject("/N")]:
v = NameObject("/Off")
# other cases will be updated through the for loop
writer_annot[NameObject(AA.AS)] = v
elif (
writer_parent_annot.get(FA.T) == field
or self._get_qualified_field_name(writer_parent_annot) == field
writer_parent_annot.get(FA.FT) == "/Tx"
or writer_parent_annot.get(FA.FT) == "/Ch"
):
writer_parent_annot[NameObject(FA.V)] = TextStringObject(value)
for k in writer_parent_annot[NameObject(FA.Kids)]:
k = k.get_object()
k[NameObject(AA.AS)] = NameObject(
value if value in k[AA.AP]["/N"] else "/Off"
)
# textbox
self._update_field_annotation(writer_parent_annot, writer_annot)
elif writer_annot.get(FA.FT) == "/Sig":
# signature
logger_warning("Signature forms not implemented yet", __name__)

Check warning on line 970 in pypdf/_writer.py

View check run for this annotation

Codecov / codecov/patch

pypdf/_writer.py#L970

Added line #L970 was not covered by tests

def reattach_fields(
self, page: Optional[PageObject] = None
Expand Down Expand Up @@ -2328,7 +2331,7 @@
Raises:
TypeError: The pages attribute is not configured properly
"""
if isinstance(fileobj, PdfReader):
if isinstance(fileobj, PdfDocCommon):
reader = fileobj
else:
stream, encryption_obj = self._create_stream(fileobj)
Expand Down
24 changes: 24 additions & 0 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,30 @@ def _clone(
def raw_get(self, key: Any) -> Any:
return dict.__getitem__(self, key)

def get_herited(self, key: str, default: Any = None) -> Any:
pubpub-zz marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns the value of a key or from the parent if not found
pubpub-zz marked this conversation as resolved.
Show resolved Hide resolved
If not found returns default
pubpub-zz marked this conversation as resolved.
Show resolved Hide resolved

Args:
key: string identifying the field to return

default: default value to return

Returns:
current key of herited one else default value
stefan6419846 marked this conversation as resolved.
Show resolved Hide resolved
"""
if key in self:
return self[key]
try:
if "/Parent" not in self:
return default
raise KeyError("not present")
pubpub-zz marked this conversation as resolved.
Show resolved Hide resolved
except KeyError:
return cast("DictionaryObject", self["/Parent"].get_object()).get_herited(
pubpub-zz marked this conversation as resolved.
Show resolved Hide resolved
key, default
)

def __setitem__(self, key: Any, value: Any) -> Any:
if not isinstance(key, PdfObject):
raise ValueError("key must be PdfObject")
Expand Down
82 changes: 82 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,88 @@ def test_update_form_fields(tmp_path):
Path(write_data_here).unlink()


@pytest.mark.enable_socket()
def test_update_form_fields2():
myFiles = {
"test1": {
"name": "Test1 Form",
"url": "https:/py-pdf/pypdf/files/14817365/test1.pdf",
"path": "iss2234a.pdf",
"usage": {
"fields": {
"First Name": "Reed",
"Middle Name": "R",
"MM": "04",
"DD": "21",
"YY": "24",
"Initial": "RRG",
# "I DO NOT Agree": null,
# "Last Name": null
},
},
},
"test2": {
"name": "Test2 Form",
"url": "https:/py-pdf/pypdf/files/14817366/test2.pdf",
"path": "iss2234b.pdf",
"usage": {
"fields": {
"p2 First Name": "Joe",
"p2 Middle Name": "S",
"p2 MM": "03",
"p2 DD": "31",
"p2 YY": "24",
"Initial": "JSS",
# "p2 I DO NOT Agree": "null",
"p2 Last Name": "Smith",
"p3 First Name": "John",
"p3 Middle Name": "R",
"p3 MM": "01",
"p3 DD": "25",
"p3 YY": "21",
},
},
},
}
merger = PdfWriter()

for file in myFiles:
reader = PdfReader(
BytesIO(get_data_from_url(myFiles[file]["url"], name=myFiles[file]["path"]))
)
reader.add_form_topname(file)
writer = PdfWriter(clone_from=reader)

for page in writer.pages:
writer.update_page_form_field_values(
page, myFiles[file]["usage"]["fields"], auto_regenerate=False
)
merger.append(writer)
assert merger.get_form_text_fields(True) == {
"test1.First Name": "Reed",
"test1.Middle Name": "R",
"test1.MM": "04",
"test1.DD": "21",
"test1.YY": "24",
"test1.Initial": "RRG",
"test1.I DO NOT Agree": None,
"test1.Last Name": None,
"test2.p2 First Name": "Joe",
"test2.p2 Middle Name": "S",
"test2.p2 MM": "03",
"test2.p2 DD": "31",
"test2.p2 YY": "24",
"test2.Initial": "JSS",
"test2.p2 I DO NOT Agree": None,
"test2.p2 Last Name": "Smith",
"test2.p3 First Name": "John",
"test2.p3 Middle Name": "R",
"test2.p3 MM": "01",
"test2.p3 DD": "25",
"test2.p3 YY": "21",
}


@pytest.mark.enable_socket()
def test_iss1862():
# The file here has "/B" entry to define the font in a object below the page
Expand Down
Loading