Skip to content

Commit

Permalink
Increase pdfminer's bufsiz to mitigate token splitting issue
Browse files Browse the repository at this point in the history
Fixes #1361
  • Loading branch information
jbarlow83 committed Jul 31, 2024
1 parent f5662d5 commit d35d008
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/ocrmypdf/pdfinfo/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import pdfminer.encodingdb
import pdfminer.pdfdevice
import pdfminer.pdfinterp
import pdfminer.psparser
from pdfminer.converter import PDFLayoutAnalyzer
from pdfminer.layout import LAParams, LTChar, LTPage, LTTextBox
from pdfminer.pdfcolor import PDFColorSpace
Expand Down Expand Up @@ -58,9 +59,10 @@ def pdfsimplefont__init__(

setattr(PDFSimpleFont, '__init__', pdfsimplefont__init__)

#
# pdfminer patches when creator is PScript5.dll
#
# Patch pdfminer.six buffer size
# The parser doesn't properly handle keyword tokens are split across the end of the
# buffer, so increase the buffer size something far larger than will ever be seen.
pdfminer.psparser.PSBaseParser.BUFSIZ = 256 * 1024 * 1024


def pdftype3font__pscript5_get_height(self):
Expand Down

0 comments on commit d35d008

Please sign in to comment.