From 90d838e7237729e74acb1325a0684fc4b0dc7b4f Mon Sep 17 00:00:00 2001 From: Magnus Rud Date: Tue, 14 Nov 2023 14:50:15 +0100 Subject: [PATCH 1/2] add -f and -l to pdfinfo --- pdf2image/pdf2image.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pdf2image/pdf2image.py b/pdf2image/pdf2image.py index 076eac3..a16bf79 100644 --- a/pdf2image/pdf2image.py +++ b/pdf2image/pdf2image.py @@ -528,6 +528,8 @@ def pdfinfo_from_path( poppler_path: str = None, rawdates: bool = False, timeout: int = None, + first_page: int = None, + last_page: int = None, ) -> Dict: """Function wrapping poppler's pdfinfo utility and returns the result as a dictionary. @@ -543,6 +545,10 @@ def pdfinfo_from_path( :type rawdates: bool, optional :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None :type timeout: int, optional + :param first_page: First page to process, defaults to None + :type first_page: int, optional + :param last_page: Last page to process before stopping, defaults to None + :type last_page: int, optional :raises PDFPopplerTimeoutError: Raised after the timeout for the image processing is exceeded :raises PDFInfoNotInstalledError: Raised if pdfinfo is not installed :raises PDFPageCountError: Raised if the output could not be parsed @@ -561,6 +567,12 @@ def pdfinfo_from_path( if rawdates: command.extend(["-rawdates"]) + if first_page: + command.extend(["-f", first_page]) + + if last_page: + command.extend(["-l", last_page]) + # Add poppler path to LD_LIBRARY_PATH env = os.environ.copy() if poppler_path is not None: @@ -607,6 +619,8 @@ def pdfinfo_from_bytes( poppler_path: str = None, rawdates: bool = False, timeout: int = None, + first_page: int = None, + last_page: int = None, ) -> Dict: """Function wrapping poppler's pdfinfo utility and returns the result as a dictionary. @@ -622,6 +636,10 @@ def pdfinfo_from_bytes( :type rawdates: bool, optional :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None :type timeout: int, optional + :param first_page: First page to process, defaults to None + :type first_page: int, optional + :param last_page: Last page to process before stopping, defaults to None + :type last_page: int, optional :return: Dictionary containing various information on the PDF :rtype: Dict """ @@ -637,6 +655,8 @@ def pdfinfo_from_bytes( poppler_path=poppler_path, rawdates=rawdates, timeout=timeout, + first_page=first_page, + last_page=last_page, ) finally: os.close(fh) From 0ede1d9fc7cd0c00931773edc62afde9c5b9868e Mon Sep 17 00:00:00 2001 From: Magnus Date: Tue, 14 Nov 2023 14:59:51 +0100 Subject: [PATCH 2/2] Cast to string --- pdf2image/pdf2image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pdf2image/pdf2image.py b/pdf2image/pdf2image.py index a16bf79..ae393fd 100644 --- a/pdf2image/pdf2image.py +++ b/pdf2image/pdf2image.py @@ -568,10 +568,10 @@ def pdfinfo_from_path( command.extend(["-rawdates"]) if first_page: - command.extend(["-f", first_page]) + command.extend(["-f", str(first_page)]) if last_page: - command.extend(["-l", last_page]) + command.extend(["-l", str(last_page)]) # Add poppler path to LD_LIBRARY_PATH env = os.environ.copy()