py-pdf · stefan6419846 · Nov 5, 2025 · Sep 14, 2025 · Sep 14, 2025 · Sep 15, 2025
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -48,7 +48,6 @@
     cast,
 )
 
-from ._cmap import _default_fonts_space_width, build_char_map_from_dict
 from ._doc_common import DocumentInformation, PdfDocCommon
 from ._encryption import EncryptAlgorithm, Encryption
 from ._page import PageObject, Transformation
@@ -85,7 +84,6 @@
     BooleanObject,
     ByteStringObject,
     ContentStream,
-    DecodedStreamObject,
     Destination,
     DictionaryObject,
     EmbeddedFile,
@@ -107,6 +105,7 @@
     hex_to_rgb,
     is_null_or_none,
 )
+from .generic._appearance_stream import TextStreamAppearance
 from .pagerange import PageRange, PageRangeSpec
 from .types import (
     AnnotationSubtype,
@@ -119,7 +118,6 @@
 from .xmp import XmpInformation
 
 ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all()
-DEFAULT_FONT_HEIGHT_IN_MULTILINE = 12
 
 
 class ObjectDeletionFlag(enum.IntFlag):
@@ -874,7 +872,6 @@ def _add_apstream_object(
             object_name: str,
             x_offset: float,
             y_offset: float,
-            font_res: Optional[DictionaryObject] = None
         ) -> None:
         """
         Adds an appearance stream to the page content in the form of
@@ -886,17 +883,25 @@ def _add_apstream_object(
             object_name: The name of the appearance stream.
             x_offset: The horizontal offset for the appearance stream.
             y_offset: The vertical offset for the appearance stream.
-            font_res: The appearance stream's font resource (if given).
         """
-        # Prepare XObject resource dictionary on the page
+        # Prepare XObject resource dictionary on the page. This currently
+        # only deals with font resources, but can easily be adapted to also
+        # include other resources.
         pg_res = cast(DictionaryObject, page[PG.RESOURCES])
-        if font_res is not None:
-            font_name = font_res["/BaseFont"]  # [/"Name"] often also exists, but is deprecated
+        if "/Resources" in appearance_stream_obj:
+            ap_stream_res = cast(DictionaryObject, appearance_stream_obj["/Resources"])
+            # No need to check "if "/Font" in ap_stream_res", because the only reason this
+            # code runs would be if we are flattening form fields, and the associated code
+            # either adds a Font resource or no resource at all. This probably needs to
+            # change if we want to use this method to flatten markup annotations.
+            ap_stream_font_dict = cast(DictionaryObject, ap_stream_res["/Font"])
             if "/Font" not in pg_res:
                 pg_res[NameObject("/Font")] = DictionaryObject()
-            pg_ft_res = cast(DictionaryObject, pg_res[NameObject("/Font")])
-            if font_name not in pg_ft_res:
-                pg_ft_res[NameObject(font_name)] = font_res
+            pg_font_res = cast(DictionaryObject, pg_res["/Font"])
+            # Merge fonts from the appearance stream into the page's font resources
+            for font_name, font_ref in ap_stream_font_dict.items():
+                if font_name not in pg_font_res:
+                    pg_font_res[font_name] = font_ref
         # Always add the resolved stream object to the writer to get a new IndirectObject.
         # This ensures we have a valid IndirectObject managed by *this* writer.
         xobject_ref = self._add_object(appearance_stream_obj)
@@ -915,160 +920,6 @@ def _add_apstream_object(
         xobject_drawing_commands = f"q\n{xobject_cm._to_cm()}\n{xobject_name} Do\nQ".encode()
         self._merge_content_stream_to_page(page, xobject_drawing_commands)
 
-    def _update_field_annotation(
-        self,
-        page: PageObject,
-        field: DictionaryObject,
-        annotation: DictionaryObject,
-        font_name: str = "",
-        font_size: float = -1,
-        flatten: bool = False,
-    ) -> None:
-        # Calculate rectangle dimensions
-        _rct = cast(RectangleObject, annotation[AA.Rect])
-        rct = RectangleObject((0, 0, abs(_rct[2] - _rct[0]), abs(_rct[3] - _rct[1])))
-
-        # Extract font information
-        da = annotation.get_inherited(
-            AA.DA,
-            cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get(
-                AA.DA, None
-            ),
-        )
-        if da is None:
-            da = TextStringObject("/Helv 0 Tf 0 g")
-        else:
-            da = da.get_object()
-        font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
-        font_properties = [x for x in font_properties if x != ""]
-        if font_name:
-            font_properties[font_properties.index("Tf") - 2] = font_name
-        else:
-            font_name = font_properties[font_properties.index("Tf") - 2]
-        font_height = (
-            font_size
-            if font_size >= 0
-            else float(font_properties[font_properties.index("Tf") - 1])
-        )
-        if font_height == 0:
-            if field.get(FA.Ff, 0) & FA.FfBits.Multiline:
-                font_height = DEFAULT_FONT_HEIGHT_IN_MULTILINE
-            else:
-                font_height = rct.height - 2
-        font_properties[font_properties.index("Tf") - 1] = str(font_height)
-        da = " ".join(font_properties)
-        y_offset = rct.height - 1 - font_height
-
-        # Retrieve font information from local DR ...
-        dr: Any = cast(
-            DictionaryObject,
-            cast(
-                DictionaryObject,
-                annotation.get_inherited(
-                    "/DR",
-                    cast(
-                        DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
-                    ).get("/DR", DictionaryObject()),
-                ),
-            ).get_object(),
-        )
-        dr = dr.get("/Font", DictionaryObject()).get_object()
-        # _default_fonts_space_width keys is the list of Standard fonts
-        if font_name not in dr and font_name not in _default_fonts_space_width:
-            # ...or AcroForm dictionary
-            dr = cast(
-                dict[Any, Any],
-                cast(
-                    DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
-                ).get("/DR", {}),
-            )
-            dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
-        font_res = dr.get(font_name, None)
-        if not is_null_or_none(font_res):
-            font_res = cast(DictionaryObject, font_res.get_object())
-            _font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
-                200, font_res
-            )
-            try:  # remove width stored in -1 key
-                del font_map[-1]
-            except KeyError:
-                pass
-            font_full_rev: dict[str, bytes]
-            if isinstance(font_encoding, str):
-                font_full_rev = {
-                    v: k.encode(font_encoding) for k, v in font_map.items()
-                }
-            else:
-                font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
-                font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
-                for key, value in font_map.items():
-                    font_full_rev[value] = font_encoding_rev.get(key, key)
-        else:
-            logger_warning(f"Font dictionary for {font_name} not found.", __name__)
-            font_full_rev = {}
-
-        # Retrieve field text and selected values
-        field_flags = field.get(FA.Ff, 0)
-        if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
-            txt = "\n".join(annotation.get_inherited(FA.Opt, []))
-            sel = field.get("/V", [])
-            if not isinstance(sel, list):
-                sel = [sel]
-        else:  # /Tx
-            txt = field.get("/V", "")
-            sel = []
-        # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
-        txt = txt.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
-        # Generate appearance stream
-        ap_stream = generate_appearance_stream(
-            txt, sel, da, font_full_rev, rct, font_height, y_offset
-        )
-
-        # Create appearance dictionary
-        dct = DecodedStreamObject.initialize_from_dictionary(
-            {
-                NameObject("/Type"): NameObject("/XObject"),
-                NameObject("/Subtype"): NameObject("/Form"),
-                NameObject("/BBox"): rct,
-                "__streamdata__": ByteStringObject(ap_stream),
-                "/Length": 0,
-            }
-        )
-        if AA.AP in annotation:
-            for k, v in cast(DictionaryObject, annotation[AA.AP]).get("/N", {}).items():
-                if k not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
-                    dct[k] = v
-
-        # Update Resources with font information if necessary
-        if font_res is not None:
-            dct[NameObject("/Resources")] = DictionaryObject(
-                {
-                    NameObject("/Font"): DictionaryObject(
-                        {
-                            NameObject(font_name): getattr(
-                                font_res, "indirect_reference", font_res
-                            )
-                        }
-                    )
-                }
-            )
-        if AA.AP not in annotation:
-            annotation[NameObject(AA.AP)] = DictionaryObject(
-                {NameObject("/N"): self._add_object(dct)}
-            )
-        elif "/N" not in cast(DictionaryObject, annotation[AA.AP]):
-            cast(DictionaryObject, annotation[NameObject(AA.AP)])[
-                NameObject("/N")
-            ] = self._add_object(dct)
-        else:  # [/AP][/N] exists
-            n = annotation[AA.AP]["/N"].indirect_reference.idnum  # type: ignore
-            self._objects[n - 1] = dct
-            dct.indirect_reference = IndirectObject(n, 0, self)
-
-        if flatten:
-            field_name = self._get_qualified_field_name(annotation)
-            self._add_apstream_object(page, dct, field_name, _rct[0], _rct[1], font_res)
-
     FFBITS_NUL = FA.FfBits(0)
 
     def update_page_form_field_values(
@@ -1111,8 +962,8 @@ def update_page_form_field_values(
         """
         if CatalogDictionary.ACRO_FORM not in self._root_object:
             raise PyPdfError("No /AcroForm dictionary in PDF of PdfWriter Object")
-        af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
-        if InteractiveFormDictEntries.Fields not in af:
+        acro_form = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
+        if InteractiveFormDictEntries.Fields not in acro_form:
             raise PyPdfError("No /Fields dictionary in PDF of PdfWriter Object")
         if isinstance(auto_regenerate, bool):
             self.set_need_appearances_writer(auto_regenerate)
@@ -1139,6 +990,7 @@ def update_page_form_field_values(
                 ).get_object()
 
             for field, value in fields.items():
+                rectangle = cast(RectangleObject, annotation[AA.Rect])
                 if not (
                     self._get_qualified_field_name(parent_annotation) == field
                     or parent_annotation.get("/T", None) == field
@@ -1151,6 +1003,7 @@ def update_page_form_field_values(
                     del parent_annotation["/I"]
                 if flags:
                     annotation[NameObject(FA.Ff)] = NumberObject(flags)
+                # Set the field value
                 if not (value is None and flatten):  # Only change values if given by user and not flattening.
                     if isinstance(value, list):
                         lst = ArrayObject(TextStringObject(v) for v in value)
@@ -1161,37 +1014,52 @@ def update_page_form_field_values(
                         )
                     else:
                         parent_annotation[NameObject(FA.V)] = TextStringObject(value)
+                # Get or create the field's appearance stream object
                 if parent_annotation.get(FA.FT) == "/Btn":
-                    # Checkbox button (no /FT found in Radio widgets)
+                    # Checkbox button (no /FT found in Radio widgets);
+                    # We can find the associated appearance stream object
+                    # within the annotation.
                     v = NameObject(value)
                     ap = cast(DictionaryObject, annotation[NameObject(AA.AP)])
                     normal_ap = cast(DictionaryObject, ap["/N"])
                     if v not in normal_ap:
                         v = NameObject("/Off")
                     appearance_stream_obj = normal_ap.get(v)
-                    # other cases will be updated through the for loop
+                    # Other cases will be updated through the for loop
                     annotation[NameObject(AA.AS)] = v
                     annotation[NameObject(FA.V)] = v
-                    if flatten and appearance_stream_obj is not None:
-                        # We basically copy the entire appearance stream, which should be an XObject that
-                        # is already registered. No need to add font resources.
-                        rct = cast(RectangleObject, annotation[AA.Rect])
-                        self._add_apstream_object(page, appearance_stream_obj, field, rct[0], rct[1])
                 elif (
                     parent_annotation.get(FA.FT) == "/Tx"
                     or parent_annotation.get(FA.FT) == "/Ch"
                 ):
-                    # textbox
+                    # Textbox; we need to generate the appearance stream object
                     if isinstance(value, tuple):
-                        self._update_field_annotation(
-                            page, parent_annotation, annotation, value[1], value[2], flatten=flatten
+                        appearance_stream_obj = TextStreamAppearance.from_text_annotation(
+                            acro_form, parent_annotation, annotation, value[1], value[2]
                         )
                     else:
-                        self._update_field_annotation(page, parent_annotation, annotation, flatten=flatten)
+                        appearance_stream_obj = TextStreamAppearance.from_text_annotation(
+                            acro_form, parent_annotation, annotation
+                        )
+                    # Add the appearance stream object
+                    if AA.AP not in annotation:
+                        annotation[NameObject(AA.AP)] = DictionaryObject(
+                            {NameObject("/N"): self._add_object(appearance_stream_obj)}
+                        )
+                    elif "/N" not in (ap:= cast(DictionaryObject, annotation[AA.AP])):
+                        cast(DictionaryObject, annotation[NameObject(AA.AP)])[
+                            NameObject("/N")
+                        ] = self._add_object(appearance_stream_obj)
+                    else:  # [/AP][/N] exists
+                        n = annotation[AA.AP]["/N"].indirect_reference.idnum  # type: ignore
+                        self._objects[n - 1] = appearance_stream_obj
+                        appearance_stream_obj.indirect_reference = IndirectObject(n, 0, self)
                 elif (
                     annotation.get(FA.FT) == "/Sig"
                 ):  # deprecated  # not implemented yet
                     logger_warning("Signature forms not implemented yet", __name__)
+                if flatten and appearance_stream_obj is not None:
+                    self._add_apstream_object(page, appearance_stream_obj, field, rectangle[0], rectangle[1])
 
     def reattach_fields(
         self, page: Optional[PageObject] = None
@@ -3435,36 +3303,3 @@ def _create_outline_item(
             format_flag += OutlineFontFlag.bold
         outline_item.update({NameObject("/F"): NumberObject(format_flag)})
     return outline_item
-
-
-def generate_appearance_stream(
-    txt: str,
-    sel: list[str],
-    da: str,
-    font_full_rev: dict[str, bytes],
-    rct: RectangleObject,
-    font_height: float,
-    y_offset: float,
-) -> bytes:
-    ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
-    for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
-        if line in sel:
-            # may be improved but cannot find how to get fill working => replaced with lined box
-            ap_stream += (
-                f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
-                f"0.5 0.5 0.5 rg s\n{da}\n"
-            ).encode()
-        if line_number == 0:
-            ap_stream += f"2 {y_offset} Td\n".encode()
-        else:
-            # Td is a relative translation
-            ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
-        enc_line: list[bytes] = [
-            font_full_rev.get(c, c.encode("utf-16-be")) for c in line
-        ]
-        if any(len(c) >= 2 for c in enc_line):
-            ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
-        else:
-            ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
-    ap_stream += b"ET\nQ\nEMC\nQ\n"
-    return ap_stream