-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sel fontinfields #2064
base: main
Are you sure you want to change the base?
Sel fontinfields #2064
Changes from all commits
8df2dfa
21af042
1b78427
5969c3f
284da98
807212b
3ef7e33
4fc16b6
56240f6
883f439
1df2408
c0fd10c
2b2b1cd
532f015
7306998
6e23da5
9953a5e
defdcd4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,6 +54,7 @@ | |
cast, | ||
) | ||
|
||
from ._cmap import build_char_map_from_dict | ||
from ._encryption import EncryptAlgorithm, Encryption | ||
from ._page import PageObject, _VirtualList | ||
from ._page_labels import nums_clear_range, nums_insert, nums_next | ||
|
@@ -831,22 +832,72 @@ def _get_qualified_field_name(self, parent: DictionaryObject) -> Optional[str]: | |
return qualified_parent + "." + cast(str, parent["/T"]) | ||
return cast(str, parent["/T"]) | ||
|
||
def _update_text_field(self, field: DictionaryObject) -> None: | ||
def _update_text_field( | ||
self, field: DictionaryObject, fontname: str = "", fontsize: float = -1 | ||
) -> None: | ||
# Calculate rectangle dimensions | ||
_rct = cast(RectangleObject, field[AA.Rect]) | ||
rct = RectangleObject((0, 0, _rct[2] - _rct[0], _rct[3] - _rct[1])) | ||
|
||
# Extract font information | ||
da = cast(str, field[AA.DA]) | ||
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ") | ||
font_name = font_properties[font_properties.index("Tf") - 2] | ||
font_height = float(font_properties[font_properties.index("Tf") - 1]) | ||
if font_height == 0: | ||
font_height = rct.height - 2 | ||
font_name = ( | ||
fontname if fontname else font_properties[font_properties.index("Tf") - 2] | ||
) | ||
font_height = ( | ||
fontsize | ||
if fontsize >= 0 | ||
else float(font_properties[font_properties.index("Tf") - 1]) | ||
) | ||
if fontname or fontsize >= 0 or font_height == 0: | ||
if fontname: | ||
font_properties[font_properties.index("Tf") - 2] = fontname | ||
if font_height == 0: | ||
font_height = rct.height - 2 | ||
font_properties[font_properties.index("Tf") - 1] = str(font_height) | ||
da = " ".join(font_properties) | ||
y_offset = rct.height - 1 - font_height | ||
|
||
# Retrieve font information from local DR ... | ||
dr: Any = cast( | ||
DictionaryObject, | ||
cast(DictionaryObject, field.get("/DR", DictionaryObject())).get_object(), | ||
) | ||
dr = dr.get("/Font", DictionaryObject()).get_object() | ||
if font_name not in dr: | ||
# ...or AcroForm dictionary | ||
dr = cast( | ||
dict, | ||
cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {}), | ||
) | ||
if isinstance(dr, IndirectObject): # pragma: no cover | ||
dr = dr.get_object() | ||
dr = dr.get("/Font", DictionaryObject()).get_object() | ||
font_res = dr.get(font_name) | ||
if font_res is not None: | ||
font_res = cast(DictionaryObject, font_res.get_object()) | ||
font_subtype, _, font_encoding, font_map = build_char_map_from_dict( | ||
200, font_res | ||
) | ||
try: # get rid of width stored in -1 key | ||
del font_map[-1] | ||
except KeyError: | ||
pass | ||
font_full_rev: Dict[str, bytes] | ||
if isinstance(font_encoding, str): | ||
font_full_rev = { | ||
v: k.encode(font_encoding) for k, v in font_map.items() | ||
} | ||
else: | ||
font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()} | ||
font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} | ||
for kk, v in font_map.items(): | ||
font_full_rev[v] = font_encoding_rev.get(kk, kk) | ||
else: | ||
logger_warning(f"can not find font dictionary for {font_name}", __name__) | ||
font_full_rev = {} | ||
|
||
# Retrieve field text and selected values | ||
field_flags = field.get(FA.Ff, 0) | ||
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0: | ||
|
@@ -872,7 +923,16 @@ def _update_text_field(self, field: DictionaryObject) -> None: | |
else: | ||
# Td is a relative translation | ||
ap_stream += f"0 {- font_height * 1.4} Td\n".encode() | ||
ap_stream += b"(" + str(line).encode("UTF-8") + b") Tj\n" | ||
enc_line: List[bytes] = [ | ||
font_full_rev.get(c, c.encode("utf-16-be")) for c in line | ||
] | ||
if any(len(c) >= 2 for c in enc_line): | ||
ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n" | ||
else: | ||
enc = b"".join(enc_line) | ||
# for x in range(32): | ||
# enc = enc.replace(bytes((x,)),b"\%03o"%x) | ||
Comment on lines
+933
to
+934
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe those debug lines should be removed before merging this PR? |
||
ap_stream += b"(" + enc + b") Tj\n" | ||
ap_stream += b"ET\nQ\nEMC\nQ\n" | ||
|
||
# Create appearance dictionary | ||
|
@@ -886,22 +946,16 @@ def _update_text_field(self, field: DictionaryObject) -> None: | |
} | ||
) | ||
|
||
# Retrieve font information from AcroForm dictionary | ||
dr: Any = cast( | ||
dict, cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {}) | ||
) | ||
if isinstance(dr, IndirectObject): | ||
dr = dr.get_object() | ||
dr = dr.get("/Font", {}) | ||
if isinstance(dr, IndirectObject): | ||
dr = dr.get_object() | ||
|
||
# Update Resources with font information if necessary | ||
if font_name in dr: | ||
if font_res is not None: | ||
dct[NameObject("/Resources")] = DictionaryObject( | ||
{ | ||
NameObject("/Font"): DictionaryObject( | ||
{NameObject(font_name): dr[font_name].indirect_reference} | ||
{ | ||
NameObject(font_name): getattr( | ||
font_res, "indirect_reference", font_res | ||
) | ||
} | ||
) | ||
} | ||
) | ||
|
@@ -934,8 +988,14 @@ def update_page_form_field_values( | |
Args: | ||
page: Page reference from PDF writer where the | ||
annotations and field data will be updated. | ||
fields: a Python dictionary of field names (/T) and text | ||
values (/V) | ||
fields: a Python dictionary of : | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe the type of the |
||
a) field names (/T) as keys and text values (/V) as value | ||
b) field names (/T) as keys and list of text values (/V) | ||
for multiple choice list | ||
c) field names (/T) as keys and tuple of : | ||
* text values (/V) | ||
* font name (must exist) | ||
* font size (0 for autosize) | ||
flags: An integer (0 to 7). The first bit sets ReadOnly, the | ||
second bit sets Required, the third bit sets NoExport. See | ||
PDF Reference Table 8.70 for details. | ||
|
@@ -971,6 +1031,10 @@ def update_page_form_field_values( | |
if isinstance(value, list): | ||
lst = ArrayObject(TextStringObject(v) for v in value) | ||
writer_annot[NameObject(FA.V)] = lst | ||
elif isinstance(value, tuple): | ||
writer_annot[NameObject(FA.V)] = TextStringObject( | ||
value[0], | ||
) | ||
else: | ||
writer_annot[NameObject(FA.V)] = TextStringObject(value) | ||
if writer_annot.get(FA.FT) in ("/Btn"): | ||
|
@@ -992,7 +1056,10 @@ def update_page_form_field_values( | |
if AA.DA in f: | ||
da = f[AA.DA] | ||
writer_annot[NameObject(AA.DA)] = da | ||
self._update_text_field(writer_annot) | ||
if isinstance(value, tuple): | ||
self._update_text_field(writer_annot, value[1], value[2]) | ||
else: | ||
self._update_text_field(writer_annot) | ||
elif writer_annot.get(FA.FT) == "/Sig": | ||
# signature | ||
logger_warning("Signature forms not implemented yet", __name__) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1589,3 +1589,43 @@ def test_missing_info(): | |
|
||
writer = PdfWriter(clone_from=reader) | ||
assert len(writer.pages) == len(reader.pages) | ||
|
||
|
||
@pytest.mark.enable_socket() | ||
def test_germanfields(): | ||
"""Cf #2035""" | ||
url = "https://github.com/py-pdf/pypdf/files/12194195/test.pdf" | ||
name = "germanfields.pdf" | ||
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) | ||
writer = PdfWriter(clone_from=reader) | ||
form_fields = {"Text Box 1": "test æ ø å"} | ||
writer.update_page_form_field_values( | ||
writer.pages[0], form_fields, auto_regenerate=False | ||
) | ||
bytes_stream = BytesIO() | ||
writer.write(bytes_stream) | ||
bytes_stream.seek(0) | ||
reader2 = PdfReader(bytes_stream) | ||
assert ( | ||
b"test \xe6 \xf8 \xe5" | ||
in reader2.get_fields()["Text Box 1"] | ||
.indirect_reference.get_object()["/AP"]["/N"] | ||
.get_data() | ||
) | ||
|
||
|
||
def test_selfont(): | ||
writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf") | ||
writer.update_page_form_field_values( | ||
writer.pages[0], | ||
{"Text1": ("Text", "", 5), "Text2": ("Text", "/F1", 15)}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
auto_regenerate=False, | ||
) | ||
assert ( | ||
b"/F3 5 Tf" | ||
in writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"].get_data() | ||
) | ||
assert ( | ||
b"/F1 15 Tf" | ||
in writer.pages[0]["/Annots"][2].get_object()["/AP"]["/N"].get_data() | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Codecov reports that this line is never evaluated in
pypdf
test suite:do you think a unit test could be added to cover this case?