Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Mark read_next_end_line as deprecated #965

Merged
merged 1 commit into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import os
import re
import struct
import warnings
from hashlib import md5
import os
from io import BytesIO
from typing import (
Any,
Expand All @@ -55,8 +55,8 @@
deprecate_with_replacement,
ord_,
read_non_whitespace,
read_until_whitespace,
read_previous_line,
read_until_whitespace,
skip_over_comment,
skip_over_whitespace,
)
Expand Down Expand Up @@ -1554,6 +1554,44 @@ def _pairs(self, array: List[int]) -> Iterable[Tuple[int, int]]:
if (i + 1) >= len(array):
break

def read_next_end_line(self, stream: StreamType, limit_offset: int = 0) -> bytes:
""".. deprecated:: 2.1.0"""
deprecate_no_replacement("read_next_end_line", removed_in="4.0.0")
line_parts = []
while True:
# Prevent infinite loops in malformed PDFs
if stream.tell() == 0 or stream.tell() == limit_offset:
raise PdfReadError("Could not read malformed PDF file")
x = stream.read(1)
if stream.tell() < 2:
raise PdfReadError("EOL marker not found")
stream.seek(-2, 1)
if x == b_("\n") or x == b_("\r"): # \n = LF; \r = CR
crlf = False
while x == b_("\n") or x == b_("\r"):
x = stream.read(1)
if x == b_("\n") or x == b_("\r"): # account for CR+LF
stream.seek(-1, 1)
crlf = True
if stream.tell() < 2:
raise PdfReadError("EOL marker not found")
stream.seek(-2, 1)
stream.seek(
2 if crlf else 1, 1
) # if using CR+LF, go back 2 bytes, else 1
break
else:
line_parts.append(x)
line_parts.reverse()
return b"".join(line_parts)

def readNextEndLine(
self, stream: StreamType, limit_offset: int = 0
) -> bytes: # pragma: no cover
""".. deprecated:: 1.28.0"""
deprecate_no_replacement("readNextEndLine")
return self.read_next_end_line(stream, limit_offset)

def decrypt(self, password: Union[str, bytes]) -> int:
"""
When using an encrypted / secured PDF file with the PDF Standard
Expand Down
32 changes: 20 additions & 12 deletions PyPDF2/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,16 @@
__author__ = "Mathieu Fenniak"
__author_email__ = "[email protected]"

import os
import warnings
from codecs import getencoder
from io import BufferedReader, BufferedWriter, BytesIO, FileIO, DEFAULT_BUFFER_SIZE
import os
from io import (
DEFAULT_BUFFER_SIZE,
BufferedReader,
BufferedWriter,
BytesIO,
FileIO,
)
from typing import Any, Dict, Optional, Tuple, Union, overload

try:
Expand All @@ -56,7 +62,7 @@
StreamType = Union[BytesIO, BufferedReader, BufferedWriter, FileIO]
StrByteType = Union[str, StreamType]

DEPR_MSG_NO_REPLACEMENT = "{} is deprecated and will be removed in PyPDF2 3.0.0."
DEPR_MSG_NO_REPLACEMENT = "{} is deprecated and will be removed in PyPDF2 {}."
DEPR_MSG = "{} is deprecated and will be removed in PyPDF2 3.0.0. Use {} instead."


Expand Down Expand Up @@ -132,7 +138,7 @@ def read_until_regex(stream: StreamType, regex: Any, ignore_eof: bool = False) -
return name


CRLF = b'\r\n'
CRLF = b"\r\n"


def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
Expand All @@ -141,14 +147,14 @@ def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
The stream's position should be unchanged.
"""
if stream.tell() < to_read:
raise PdfStreamError('Could not read malformed PDF file')
raise PdfStreamError("Could not read malformed PDF file")
# Seek to the start of the block we want to read.
stream.seek(-to_read, os.SEEK_CUR)
read = stream.read(to_read)
# Seek to the start of the block we read after reading it.
stream.seek(-to_read, os.SEEK_CUR)
if len(read) != to_read:
raise PdfStreamError('EOF: read %s, expected %s?' % (len(read), to_read))
raise PdfStreamError(f"EOF: read {len(read)}, expected {to_read}?")
return read


Expand Down Expand Up @@ -184,7 +190,7 @@ def read_previous_line(stream: StreamType) -> bytes:
# a previous one).
# Our combined line is the remainder of the block
# plus any previously read blocks.
line_content.append(block[idx + 1:])
line_content.append(block[idx + 1 :])
# Continue to read off any more CRLF characters.
while idx >= 0 and block[idx] in CRLF:
idx -= 1
Expand All @@ -198,7 +204,7 @@ def read_previous_line(stream: StreamType) -> bytes:
stream.seek(idx + 1, os.SEEK_CUR)
break
# Join all the blocks in the line (which are in reverse order)
return b''.join(line_content[::-1])
return b"".join(line_content[::-1])


def matrix_multiply(
Expand Down Expand Up @@ -315,9 +321,11 @@ def deprecate(msg: str, stacklevel: int = 3) -> None:
warnings.warn(msg, PendingDeprecationWarning, stacklevel=stacklevel)


def deprecate_with_replacement(old_name: str, new_name: str) -> None:
deprecate(DEPR_MSG.format(old_name, new_name), 4)
def deprecate_with_replacement(
old_name: str, new_name: str, removed_in: str = "3.0.0"
) -> None:
deprecate(DEPR_MSG.format(old_name, new_name, removed_in), 4)


def deprecate_no_replacement(name: str) -> None:
deprecate(DEPR_MSG_NO_REPLACEMENT.format(name), 4)
def deprecate_no_replacement(name: str, removed_in: str = "3.0.0") -> None:
deprecate(DEPR_MSG_NO_REPLACEMENT.format(name, removed_in), 4)
66 changes: 41 additions & 25 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from PyPDF2._utils import (
mark_location,
matrix_multiply,
read_block_backwards,
read_previous_line,
read_until_regex,
read_until_whitespace,
skip_over_comment,
skip_over_whitespace,
read_block_backwards,
read_previous_line
)
from PyPDF2.errors import PdfStreamError

Expand Down Expand Up @@ -128,9 +128,9 @@ def test_paeth_predictor(left, up, upleft, expected):
@pytest.mark.parametrize(
("dat", "pos", "to_read"),
[
(b'', 0, 1),
(b'a', 0, 1),
(b'abc', 0, 10),
(b"", 0, 1),
(b"a", 0, 1),
(b"abc", 0, 10),
],
)
def test_read_block_backwards_errs(dat, pos, to_read):
Expand All @@ -143,13 +143,13 @@ def test_read_block_backwards_errs(dat, pos, to_read):
@pytest.mark.parametrize(
("dat", "pos", "to_read", "expected", "expected_pos"),
[
(b'abc', 1, 0, b'', 1),
(b'abc', 1, 1, b'a', 0),
(b'abc', 2, 1, b'b', 1),
(b'abc', 2, 2, b'ab', 0),
(b'abc', 3, 1, b'c', 2),
(b'abc', 3, 2, b'bc', 1),
(b'abc', 3, 3, b'abc', 0),
(b"abc", 1, 0, b"", 1),
(b"abc", 1, 1, b"a", 0),
(b"abc", 2, 1, b"b", 1),
(b"abc", 2, 2, b"ab", 0),
(b"abc", 3, 1, b"c", 2),
(b"abc", 3, 2, b"bc", 1),
(b"abc", 3, 3, b"abc", 0),
],
)
def test_read_block_backwards(dat, pos, to_read, expected, expected_pos):
Expand All @@ -160,30 +160,46 @@ def test_read_block_backwards(dat, pos, to_read, expected, expected_pos):


def test_read_block_backwards_at_start():
s = io.BytesIO(b'abc')
s = io.BytesIO(b"abc")
with pytest.raises(PdfStreamError) as _:
read_previous_line(s)


@pytest.mark.parametrize(
("dat", "pos", "expected", "expected_pos"),
[
(b'abc', 1, b'a', 0),
(b'abc', 2, b'ab', 0),
(b'abc', 3, b'abc', 0),
(b'abc\n', 3, b'abc', 0),
(b'abc\n', 4, b'', 3),
(b'abc\n\r', 4, b'', 3),
(b'abc\nd', 5, b'd', 3),
(b"abc", 1, b"a", 0),
(b"abc", 2, b"ab", 0),
(b"abc", 3, b"abc", 0),
(b"abc\n", 3, b"abc", 0),
(b"abc\n", 4, b"", 3),
(b"abc\n\r", 4, b"", 3),
(b"abc\nd", 5, b"d", 3),
# Skip over multiple CR/LF bytes
(b'abc\n\r\ndef', 9, b'def', 3),
(b"abc\n\r\ndef", 9, b"def", 3),
# Include a block full of newlines...
(b'abc' + b'\n' * (2 * io.DEFAULT_BUFFER_SIZE) + b'd', 2 * io.DEFAULT_BUFFER_SIZE + 4, b'd', 3),
(
b"abc" + b"\n" * (2 * io.DEFAULT_BUFFER_SIZE) + b"d",
2 * io.DEFAULT_BUFFER_SIZE + 4,
b"d",
3,
),
# Include a block full of non-newline characters
(b'abc\n' + b'd' * (2 * io.DEFAULT_BUFFER_SIZE), 2 * io.DEFAULT_BUFFER_SIZE + 4, b'd' * (2 * io.DEFAULT_BUFFER_SIZE), 3),
(
b"abc\n" + b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
2 * io.DEFAULT_BUFFER_SIZE + 4,
b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
3,
),
# Both
(b'abcxyz' + b'\n' * (2 * io.DEFAULT_BUFFER_SIZE) + b'd' * (2 * io.DEFAULT_BUFFER_SIZE),\
4 * io.DEFAULT_BUFFER_SIZE + 6, b'd' * (2 * io.DEFAULT_BUFFER_SIZE), 6),
(
b"abcxyz"
+ b"\n" * (2 * io.DEFAULT_BUFFER_SIZE)
+ b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
4 * io.DEFAULT_BUFFER_SIZE + 6,
b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
6,
),
],
)
def test_read_previous_line(dat, pos, expected, expected_pos):
Expand Down