py-pdf · stefan6419846 · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -747,13 +747,24 @@
  if callable(after_page_append):
  after_page_append(writer_page)
 
- def _update_text_field(self, field: DictionaryObject) -> None:
+ def _update_field_annotation(
+ self, field: DictionaryObject, anno: DictionaryObject
+ ) -> None:
  # Calculate rectangle dimensions
- _rct = cast(RectangleObject, field[AA.Rect])
+ _rct = cast(RectangleObject, anno[AA.Rect])
  rct = RectangleObject((0, 0, _rct[2] - _rct[0], _rct[3] - _rct[1]))
 
  # Extract font information
- da = cast(str, field[AA.DA])
+ da = anno.get_herited(
+ AA.DA,
+ cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get(
+ AA.DA, None
+ ),
+ )
+ if da is None:
+ da = TextStringObject("/Helv 0 Tf 0 g")
+ else:
+ da = da.get_object()
  font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
  font_properties = [x for x in font_properties if x != ""]
  font_name = font_properties[font_properties.index("Tf") - 2]
@@ -767,19 +778,27 @@
  # Retrieve font information from local DR ...
  dr: Any = cast(
  DictionaryObject,
- cast(DictionaryObject, field.get("/DR", DictionaryObject())).get_object(),
+ cast(
+ DictionaryObject,
+ anno.get_herited(
+ "/DR",
+ cast(
+ DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
+ ).get("/DR", DictionaryObject()),
+ ),
+ ).get_object(),
  )
  dr = dr.get("/Font", DictionaryObject()).get_object()
  if font_name not in dr:
  # ...or AcroForm dictionary
  dr = cast(
  Dict[Any, Any],
- cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {}),
+ cast(
+ DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
+ ).get("/DR", {}),
  )
- if isinstance(dr, IndirectObject): # pragma: no cover
- dr = dr.get_object()
- dr = dr.get("/Font", DictionaryObject()).get_object()
- font_res = dr.get(font_name)
+ dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
+ font_res = dr.get(font_name, None)
  if font_res is not None:
  font_res = cast(DictionaryObject, font_res.get_object())
  font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
@@ -806,7 +825,7 @@
  # Retrieve field text and selected values
  field_flags = field.get(FA.Ff, 0)
  if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
- txt = "\n".join(field.get(FA.Opt, {}))
+ txt = "\n".join(anno.get_herited(FA.Opt, {}))
  sel = field.get("/V", [])
  if not isinstance(sel, list):
  sel = [sel]
@@ -822,7 +841,7 @@
  # may be improved but can not find how get fill working => replaced with lined box
  ap_stream += (
  f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
- f"0.5 0.5 0.5 rg s\n{field[AA.DA]}\n"
+ f"0.5 0.5 0.5 rg s\n{da}\n"
  ).encode()
  if line_number == 0:
  ap_stream += f"2 {y_offset} Td\n".encode()
@@ -862,16 +881,16 @@
  )
  }
  )
- if AA.AP not in field:
- field[NameObject(AA.AP)] = DictionaryObject(
+ if AA.AP not in anno:
+ anno[NameObject(AA.AP)] = DictionaryObject(
  {NameObject("/N"): self._add_object(dct)}
  )
- elif "/N" not in cast(DictionaryObject, field[AA.AP]):
- cast(DictionaryObject, field[NameObject(AA.AP)])[
+ elif "/N" not in cast(DictionaryObject, anno[AA.AP]):
+ cast(DictionaryObject, anno[NameObject(AA.AP)])[
  NameObject("/N")
  ] = self._add_object(dct)
  else: # [/AP][/N] exists
- n = field[AA.AP]["/N"].indirect_reference.idnum # type: ignore
+ n = anno[AA.AP]["/N"].indirect_reference.idnum # type: ignore
  self._objects[n - 1] = dct
  dct.indirect_reference = IndirectObject(n, 0, self)
 
@@ -906,65 +925,49 @@
  raise PyPdfError("No /Fields dictionary in Pdf in PdfWriter Object")
  if isinstance(auto_regenerate, bool):
  self.set_need_appearances_writer(auto_regenerate)
- # Iterate through pages, update field values
  if PG.ANNOTS not in page:
  logger_warning("No fields to update on this page", __name__)
  return
- # /Helvetica is just in case of but this is normally insufficient as we miss the font resource
- default_da = af.get(
- InteractiveFormDictEntries.DA, TextStringObject("/Helvetica 0 Tf 0 g")
- )
  for writer_annot in page[PG.ANNOTS]: # type: ignore
  writer_annot = cast(DictionaryObject, writer_annot.get_object())
- # retrieve parent field values, if present
- writer_parent_annot = writer_annot.get(
- PG.PARENT, DictionaryObject()
- ).get_object()
+ if writer_annot.get("/Subtype", "") != "/Widget":
+ continue
+ if "/FT" in writer_annot and "/T" in writer_annot:
+ writer_parent_annot = writer_annot
+ else:
+ writer_parent_annot = writer_annot.get(
+ PG.PARENT, DictionaryObject()
+ ).get_object()
+
  for field, value in fields.items():
- if (
- writer_annot.get(FA.T) == field
- or self._get_qualified_field_name(writer_annot) == field
+ if not (
+ self._get_qualified_field_name(writer_parent_annot) == field
+ or writer_parent_annot.get("/T", None) == field
  ):
- if isinstance(value, list):
- lst = ArrayObject(TextStringObject(v) for v in value)
- writer_annot[NameObject(FA.V)] = lst
- else:
- writer_annot[NameObject(FA.V)] = TextStringObject(value)
- if writer_annot.get(FA.FT) in ("/Btn"):
- # case of Checkbox button (no /FT found in Radio widgets
- writer_annot[NameObject(AA.AS)] = NameObject(value)
- elif (
- writer_annot.get(FA.FT) == "/Tx"
- or writer_annot.get(FA.FT) == "/Ch"
- ):
- # textbox
- if AA.DA not in writer_annot:
- f = writer_annot
- da = default_da
- while AA.DA not in f:
- f = f.get("/Parent")
- if f is None:
- break
- f = f.get_object()
- if AA.DA in f:
- da = f[AA.DA]
- writer_annot[NameObject(AA.DA)] = da
- self._update_text_field(writer_annot)
- elif writer_annot.get(FA.FT) == "/Sig":
- # signature
- logger_warning("Signature forms not implemented yet", __name__)
- if flags:
- writer_annot[NameObject(FA.Ff)] = NumberObject(flags)
+ continue
+ if flags:
+ writer_annot[NameObject(FA.Ff)] = NumberObject(flags)
+ if isinstance(value, list):
+ lst = ArrayObject(TextStringObject(v) for v in value)
+ writer_parent_annot[NameObject(FA.V)] = lst
+ else:
+ writer_parent_annot[NameObject(FA.V)] = TextStringObject(value)
+ if writer_parent_annot.get(FA.FT) in ("/Btn"):
+ # case of Checkbox button (no /FT found in Radio widgets
+ v = NameObject(value)
+ if v not in writer_annot[NameObject(AA.AP)][NameObject("/N")]:
+ v = NameObject("/Off")
+ # other cases will be updated through the for loop
+ writer_annot[NameObject(AA.AS)] = v
  elif (
- writer_parent_annot.get(FA.T) == field
- or self._get_qualified_field_name(writer_parent_annot) == field
+ writer_parent_annot.get(FA.FT) == "/Tx"
+ or writer_parent_annot.get(FA.FT) == "/Ch"
  ):
- writer_parent_annot[NameObject(FA.V)] = TextStringObject(value)
- for k in writer_parent_annot[NameObject(FA.Kids)]:
- k = k.get_object()
- k[NameObject(AA.AS)] = NameObject(
- value if value in k[AA.AP]["/N"] else "/Off"
- )
+ # textbox
+ self._update_field_annotation(writer_parent_annot, writer_annot)
+ elif writer_annot.get(FA.FT) == "/Sig":
+ # signature
+ logger_warning("Signature forms not implemented yet", __name__)
 
  def reattach_fields(
  self, page: Optional[PageObject] = None
@@ -2328,7 +2331,7 @@
  Raises:
  TypeError: The pages attribute is not configured properly
  """
- if isinstance(fileobj, PdfReader):
+ if isinstance(fileobj, PdfDocCommon):
  reader = fileobj
  else:
  stream, encryption_obj = self._create_stream(fileobj)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
@@ -367,6 +367,30 @@ def _clone(
  def raw_get(self, key: Any) -> Any:
  return dict.__getitem__(self, key)
 
+ def get_herited(self, key: str, default: Any = None) -> Any:
+ """
+ Returns the value of a key or from the parent if not found
+ If not found returns default
+
+ Args:
+ key: string identifying the field to return
+
+ default: default value to return
+
+ Returns:
+ current key of herited one else default value
+ """
+ if key in self:
+ return self[key]
+ try:
+ if "/Parent" not in self:
+ return default
+ raise KeyError("not present")
+ except KeyError:
+ return cast("DictionaryObject", self["/Parent"].get_object()).get_herited(
+ key, default
+ )
+
  def __setitem__(self, key: Any, value: Any) -> Any:
  if not isinstance(key, PdfObject):
  raise ValueError("key must be PdfObject")

diff --git a/tests/test_writer.py b/tests/test_writer.py
@@ -1502,6 +1502,88 @@ def test_update_form_fields(tmp_path):
  Path(write_data_here).unlink()
 
 
+@pytest.mark.enable_socket()
+def test_update_form_fields2():
+ myFiles = {
+ "test1": {
+ "name": "Test1 Form",
+ "url": "https:/py-pdf/pypdf/files/14817365/test1.pdf",
+ "path": "iss2234a.pdf",
+ "usage": {
+ "fields": {
+ "First Name": "Reed",
+ "Middle Name": "R",
+ "MM": "04",
+ "DD": "21",
+ "YY": "24",
+ "Initial": "RRG",
+ # "I DO NOT Agree": null,
+ # "Last Name": null
+ },
+ },
+ },
+ "test2": {
+ "name": "Test2 Form",
+ "url": "https:/py-pdf/pypdf/files/14817366/test2.pdf",
+ "path": "iss2234b.pdf",
+ "usage": {
+ "fields": {
+ "p2 First Name": "Joe",
+ "p2 Middle Name": "S",
+ "p2 MM": "03",
+ "p2 DD": "31",
+ "p2 YY": "24",
+ "Initial": "JSS",
+ # "p2 I DO NOT Agree": "null",
+ "p2 Last Name": "Smith",
+ "p3 First Name": "John",
+ "p3 Middle Name": "R",
+ "p3 MM": "01",
+ "p3 DD": "25",
+ "p3 YY": "21",
+ },
+ },
+ },
+ }
+ merger = PdfWriter()
+
+ for file in myFiles:
+ reader = PdfReader(
+ BytesIO(get_data_from_url(myFiles[file]["url"], name=myFiles[file]["path"]))
+ )
+ reader.add_form_topname(file)
+ writer = PdfWriter(clone_from=reader)
+
+ for page in writer.pages:
+ writer.update_page_form_field_values(
+ page, myFiles[file]["usage"]["fields"], auto_regenerate=False
+ )
+ merger.append(writer)
+ assert merger.get_form_text_fields(True) == {
+ "test1.First Name": "Reed",
+ "test1.Middle Name": "R",
+ "test1.MM": "04",
+ "test1.DD": "21",
+ "test1.YY": "24",
+ "test1.Initial": "RRG",
+ "test1.I DO NOT Agree": None,
+ "test1.Last Name": None,
+ "test2.p2 First Name": "Joe",
+ "test2.p2 Middle Name": "S",
+ "test2.p2 MM": "03",
+ "test2.p2 DD": "31",
+ "test2.p2 YY": "24",
+ "test2.Initial": "JSS",
+ "test2.p2 I DO NOT Agree": None,
+ "test2.p2 Last Name": "Smith",
+ "test2.p3 First Name": "John",
+ "test2.p3 Middle Name": "R",
+ "test2.p3 MM": "01",
+ "test2.p3 DD": "25",
+ "test2.p3 YY": "21",
+ }
+
+
 @pytest.mark.enable_socket()
 def test_iss1862():
  # The file here has "/B" entry to define the font in a object below the page