Merge branch 'main' into forms_dropdown_options

py-pdf · Jul 21, 2022 · 6eeba04 · 6eeba04
2 parents 13907dd + e1f9772
commit 6eeba04
Show file tree

Hide file tree

Showing 11 changed files with 100 additions and 10 deletions.
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -0,0 +1,15 @@
+# This file helps us to ignore style / formatting / doc changes
+# in git blame. That is useful when we're trying to find the root cause of an
+# error.
+
+# Docstring formatting
+a89ff74d8c0203278a039d9496a3d8df4d134f84
+
+# STY: Apply pre-commit (black, isort) + use snake_case variables (#832)
+eef03d935dfeacaa75848b39082cf94d833d3174
+
+# STY: Apply black and isort
+baeb7d23278de0f8d00ca9f2b656bf0674f08937
+
+# STY: Documentation, Variable names (#839)
+444fca22836df061d9d23e71ffb7d68edcdfa766
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -0,0 +1,49 @@
+# Contributors
+
+PyPDF2 had a lot of contributors since it started with pyPdf in 2005. We are
+a free software project without any company affiliation. We cannot pay
+contributors, but we do value their contributions. A lot of time, effort, and
+expertise went into this project. With this list, we recognize those awesome
+people 🤗
+
+The list is definitely not complete. You can find more contributors via the git
+history and [GitHubs 'Contributors' feature](https:/py-pdf/PyPDF2/graphs/contributors).
+
+## Contributors to the pyPdf / PyPDF2 project
+
+* [Karvonen, Harry](https:/Hatell/)
+* [Lightup1](https:/Lightup1)
+* [Pinheiro, Arthur](https:/xilopaint)
+* [pubpub-zz](https:/pubpub-zz): involved in community development
+* [Thoma, Martin](https:/MartinThoma): Maintainer of PyPDF2 since April 2022. I hope to build a great community with many awesome contributors. [LinkedIn](https://www.linkedin.com/in/martin-thoma/) | [StackOverflow](https://stackoverflow.com/users/562769/martin-thoma) | [Blog](https://martin-thoma.com/)
+* ztravis
+
+## Adding a new contributor
+
+Contributors are:
+
+* Anybody who has an commit in main - no matter how big/small or how many. Also if it's via co-authored-by.
+* People who opened helpful issues:
+ (1) Bugs: with complete MCVE
+ (2) Well-described feature requests
+ (3) Potentially some more.
+ The maintainers of PyPDF2 have the last call on that one.
+* Community work: This is exceptional. If the maintainers of PyPDF2 see people
+ being super helpful in answering issues / discussions or being very active on
+ Stackoverflow, we also consider them being contributors to PyPDF2.
+
+Contributors can add themselves or ask via an Github Issue to be added.
+
+Please use the following format:
+
+```
+* Last name, First name: 140-characters of text; links to linkedin / github / other profiles and personal pages are ok
+
+OR
+
+* GitHub Username: 140-characters of text; links to linkedin / github / other profiles and personal pages are ok
+```
+
+and add the entry in the alphabetical order. People who . The 140 characters are everything visible after the `Name:`.
+
+Please don't use images.
diff --git a/PyPDF2/_cmap.py b/PyPDF2/_cmap.py
@@ -284,7 +284,10 @@ def compute_space_width(
  w1[-1] = cast(float, ft1["/DW"])
  except Exception:
  w1[-1] = 1000.0
- w = list(ft1["/W"]) # type: ignore
+ if "/W" in ft1:
+ w = list(ft1["/W"]) # type: ignore
+ else:
+ w = []
  while len(w) > 0:
  st = w[0]
  second = w[1]

diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py
@@ -1326,6 +1326,15 @@ def read(self, stream: StreamType) -> None:
  if found:
  continue
  # no xref table found at specified location
+ if "/Root" in self.trailer and not self.strict:
+ # if Root has been already found, just raise warning
+ warnings.warn("Invalid parent xref., rebuild xref", PdfReadWarning)
+ try:
+ self._rebuild_xref_table(stream)
+ break
+ except Exception:
+ raise PdfReadError("can not rebuild xref")
+ break
  raise PdfReadError("Could not find xref table at specified location")
  # if not zero-indexed, verify that the table is correct; change it if necessary
  if self.xref_index and not self.strict:

diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py
@@ -1162,9 +1162,17 @@ def get_data(self) -> Union[None, str, bytes]:
  self.decoded_self = decoded
  return decoded._data
 
+ def getData(self) -> Union[None, str, bytes]: # pragma: no cover
+ deprecate_with_replacement("getData", "get_data")
+ return self.get_data()
+
  def set_data(self, data: Any) -> None:
  raise PdfReadError("Creating EncodedStreamObject is not currently supported")
 
+ def setData(self, data: Any) -> None: # pragma: no cover
+ deprecate_with_replacement("setData", "set_data")
+ return self.set_data(data)
+
 
 class ContentStream(DecodedStreamObject):
  def __init__(

diff --git a/docs/conf.py b/docs/conf.py
@@ -17,6 +17,7 @@
 sys.path.insert(0, os.path.abspath("../"))
 
 shutil.copyfile("../CHANGELOG.md", "meta/CHANGELOG.md")
+shutil.copyfile("../CONTRIBUTORS.md", "meta/CONTRIBUTORS.md")
 
 # -- Project information -----------------------------------------------------
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -67,6 +67,7 @@ You can contribute to `PyPDF2 on Github <https:/py-pdf/PyPDF2>`_.
  meta/CHANGELOG
  meta/project-governance
  meta/history
+ meta/CONTRIBUTORS
  meta/comparisons
  meta/faq
 

diff --git a/docs/meta/project-governance.md b/docs/meta/project-governance.md
@@ -71,7 +71,7 @@ as their mother tongue. We try our best to understand others -
 The community can expect the following:
 
 * The **benevolent dictator** tries their best to make decisions from which the overall
- community profits. The benevolent dictator is aware that his/her decisons can shape the
+ community profits. The benevolent dictator is aware that his/her decisions can shape the
  overall community. Once the benevolent dictator notices that she/he doesn't have the time
  to advance PyPDF2, he/she looks for a new benevolent dictator. As it is expected
  that the benevolent dictator will step down at some point of their choice

diff --git a/tests/test_page.py b/tests/test_page.py
@@ -243,6 +243,11 @@ def test_extract_text_single_quote_op():
  "https://corpora.tika.apache.org/base/docs/govdocs1/932/932446.pdf",
  "tika-932446.pdf",
  ),
+ # iss 1134:
+ (
+ "https:/py-pdf/PyPDF2/files/9150656/ST.2019.PDF",
+ "iss_1134.pdf",
+ ),
  ],
 )
 def test_extract_text_page_pdf(url, name):

diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -238,7 +238,7 @@ def test_get_images_raw(strict, with_prev_0, startx_correction, should_fail):
  pdf_data.find(b"4 0 obj"),
  pdf_data.find(b"5 0 obj"),
  b"/Prev 0 " if with_prev_0 else b"",
- # startx_correction should be -1 due to double % at the beginning indiducing an error on startxref computation
+ # startx_correction should be -1 due to double % at the beginning inducing an error on startxref computation
  pdf_data.find(b"xref") + startx_correction,
  )
  pdf_stream = io.BytesIO(pdf_data)
@@ -770,12 +770,12 @@ def test_get_fields():
  assert dict(fields["c1-1"]) == ({"/FT": "/Btn", "/T": "c1-1"})
 
 
+# covers also issue 1089
+@pytest.mark.filterwarnings("ignore::PyPDF2.errors.PdfReadWarning")
 def test_get_fields_read_else_block():
  url = "https://corpora.tika.apache.org/base/docs/govdocs1/934/934771.pdf"
  name = "tika-934771.pdf"
- with pytest.raises(PdfReadError) as exc:
- PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
- assert exc.value.args[0] == "Could not find xref table at specified location"
+ PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
 
 
 def test_get_fields_read_else_block2():
@@ -786,12 +786,11 @@ def test_get_fields_read_else_block2():
  assert fields is None
 
 
+@pytest.mark.filterwarnings("ignore::PyPDF2.errors.PdfReadWarning")
 def test_get_fields_read_else_block3():
  url = "https://corpora.tika.apache.org/base/docs/govdocs1/957/957721.pdf"
  name = "tika-957721.pdf"
- with pytest.raises(PdfReadError) as exc:
- PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
- assert exc.value.args[0] == "Could not find xref table at specified location"
+ PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
 
 
 def test_metadata_is_none():

diff --git a/tests/test_writer.py b/tests/test_writer.py
@@ -203,7 +203,7 @@ def test_remove_text_all_operators(ignore_byte_string_object):
  pdf_data.find(b"4 0 obj") + startx_correction,
  pdf_data.find(b"5 0 obj") + startx_correction,
  pdf_data.find(b"6 0 obj") + startx_correction,
- # startx_correction should be -1 due to double % at the beginning indiducing an error on startxref computation
+ # startx_correction should be -1 due to double % at the beginning inducing an error on startxref computation
  pdf_data.find(b"xref"),
  )
  print(pdf_data.decode())