1
1
#!/usr/bin/env python
2
2
# -*- coding: utf-8 -*-
3
- from PyPDF2 import PdfFileReader , PdfFileWriter
4
- from PyPDF2 . pdf import ContentStream
3
+ # PyPDF2 - a Python library that allows reading, writing, and manipulating PDF files
4
+ from PyPDF2 import PdfReader , PdfWriter
5
5
from PyPDF2 .generic import NumberObject , TextStringObject , NameObject
6
- from PyPDF2 .utils import b_
7
6
7
+ from PyPDF2 .generic import ContentStream
8
+
9
+ # tkinter - used for building the GUI (Graphical User Interface) application
8
10
from tkinter import Tk , Label , Button , StringVar
9
11
from tkinter .filedialog import askopenfilename , asksaveasfilename , askdirectory
10
12
from tkinter .constants import N ,S ,W ,E , LEFT , TOP , RIGHT , BOTTOM
11
13
import sys , os
12
14
import tkinter .font as font
15
+
16
+ # reportlab - a library for generating PDFs programmatically
13
17
from reportlab .pdfgen import canvas
14
18
15
19
def resource_path (relative_path ):
@@ -18,7 +22,7 @@ def resource_path(relative_path):
18
22
return os .path .join (os .path .abspath ("." ), relative_path )
19
23
20
24
21
- class PdfEnhancedFileWriter (PdfFileWriter ):
25
+ class PdfEnhancedFileWriter (PdfWriter ):
22
26
23
27
colors_operands = {
24
28
'rgb' : {
@@ -37,23 +41,20 @@ class PdfEnhancedFileWriter(PdfFileWriter):
37
41
38
42
def _getOperatorType (self , operator ):
39
43
operator_types = {
40
- b_ ('Tj' ): 'text' ,
41
- b_ ("'" ): 'text' ,
42
- b_ ('"' ): 'text' ,
43
- b_ ("TJ" ): 'text' ,
44
-
45
- b_ ('rg' ): 'rgb' , # color
46
- b_ ('RG' ): 'rgb' , # color
47
- b_ ('k' ): 'cmyk' , # color
48
- b_ ('K' ): 'cmyk' , # color
49
- b_ ('g' ): 'grayscale' , # color
50
- b_ ('G' ): 'grayscale' , # color
51
-
52
- b_ ('re' ): 'rectangle' ,
53
-
54
- b_ ('l' ): 'line' , # line
55
- b_ ('m' ): 'line' , # start line
56
- b_ ('S' ): 'line' , # stroke(paint) line
44
+ (b"Tj" ): "text" ,
45
+ (b"'" ): "text" ,
46
+ (b'"' ): "text" ,
47
+ (b"TJ" ): "text" ,
48
+ (b"rg" ): "rgb" , # color
49
+ (b"RG" ): "rgb" , # color
50
+ (b"k" ): "cmyk" , # color
51
+ (b"K" ): "cmyk" , # color
52
+ (b"g" ): "grayscale" , # color
53
+ (b"G" ): "grayscale" , # color
54
+ (b"re" ): "rectangle" ,
55
+ (b"l" ): "line" , # line
56
+ (b"m" ): "line" , # start line
57
+ (b"S" ): "line" , # stroke(paint) line
57
58
}
58
59
59
60
if operator in operator_types :
@@ -86,11 +87,10 @@ def removeWordStyle(self, ignoreByteStringObject=False):
86
87
to ignore ByteString Objects.
87
88
"""
88
89
89
- pages = self .getObject (self ._pages )['/Kids' ]
90
- for j in range (len (pages )):
91
- page = pages [j ]
92
- pageRef = self .getObject (page )
93
- content = pageRef ['/Contents' ].getObject ()
90
+ pages = self .get_object (self ._pages )['/Kids' ]
91
+ for page in pages :
92
+ pageRef = self .get_object (page )
93
+ content = pageRef ["/Contents" ].get_object ()
94
94
95
95
if not isinstance (content , ContentStream ):
96
96
content = ContentStream (content , pageRef )
@@ -100,25 +100,25 @@ def removeWordStyle(self, ignoreByteStringObject=False):
100
100
101
101
for operator_index , (operands , operator ) in enumerate (content .operations ):
102
102
103
- if operator == b_ ( 'Tf' ) and operands [0 ][:2 ] == '/F' :
103
+ if operator == ( b 'Tf' ) and operands [0 ][:2 ] == '/F' :
104
104
last_font_size = operands [1 ].as_numeric ()
105
105
106
- if operator == b_ ( 'Tj' ):
106
+ if operator == ( b 'Tj' ):
107
107
text = operands [0 ]
108
108
if ignoreByteStringObject :
109
109
if not isinstance (text , TextStringObject ):
110
110
operands [0 ] = TextStringObject ()
111
- elif operator == b_ ( "'" ):
111
+ elif operator == ( b "'" ):
112
112
text = operands [0 ]
113
113
if ignoreByteStringObject :
114
114
if not isinstance (text , TextStringObject ):
115
115
operands [0 ] = TextStringObject ()
116
- elif operator == b_ ( '"' ):
116
+ elif operator == ( b '"' ):
117
117
text = operands [2 ]
118
118
if ignoreByteStringObject :
119
119
if not isinstance (text , TextStringObject ):
120
120
operands [2 ] = TextStringObject ()
121
- elif operator == b_ ( "TJ" ):
121
+ elif operator == ( b "TJ" ):
122
122
for i in range (len (operands [0 ])):
123
123
if ignoreByteStringObject :
124
124
if not isinstance (operands [0 ][i ], TextStringObject ):
@@ -149,7 +149,7 @@ def removeWordStyle(self, ignoreByteStringObject=False):
149
149
# remove styled rectangles (highlights, lines, etc.)
150
150
# the 're' operator is a Path Construction operator, creates a rectangle()
151
151
# presumably, that's the way word embedding all of it's graphics into a PDF when creating one
152
- if operator == b_ ( 're' ):
152
+ if operator == ( b 're' ):
153
153
154
154
rectangle_width = operands [- 2 ].as_numeric ()
155
155
rectangle_height = operands [- 1 ].as_numeric ()
@@ -192,7 +192,7 @@ def createMultiPage(file_path):
192
192
193
193
def load_pdf (filename ):
194
194
f = open (filename ,'rb' )
195
- return PdfFileReader (f )
195
+ return PdfReader (f )
196
196
197
197
def load1 ():
198
198
f = askopenfilename (multiple = True , filetypes = (('PDF File' , '*.pdf' ), ('All Files' , '*.*' )))
@@ -214,7 +214,7 @@ def load1():
214
214
#print(pdf_list)
215
215
216
216
def add_to_writer (pdfsrc , writer ):
217
- [writer .addPage (pdfsrc .getPage ( i )) for i in range (pdfsrc .getNumPages ( ))]
217
+ [writer .add_page (pdfsrc .pages [ i ]) for i in range (len ( pdfsrc .pages ))]
218
218
writer .removeWordStyle ()
219
219
220
220
def remove_images ():
0 commit comments