-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathcustom-html-handling.py
executable file
·43 lines (30 loc) · 1.06 KB
/
custom-html-handling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python3
"""
Custom HTML tag handling example.
Add a custom HTML handler for the bold <b> tag which encloses
bold text with "**".
Example:
"Welcome to <b>Chur</b>" is rendered as "Welcome to **Chur**".
"""
from typing import Dict
from inscriptis import ParserConfig
from inscriptis.html_engine import Inscriptis
from inscriptis.model.html_document_state import HtmlDocumentState
from inscriptis.model.tag import CustomHtmlTagHandlerMapping
from lxml.html import fromstring
def my_handle_start_b(state: HtmlDocumentState, _: Dict) -> None:
"""Handle the opening <b> tag."""
state.tags[-1].write("**")
def my_handle_end_b(state: HtmlDocumentState) -> None:
"""Handle the closing </b> tag."""
state.tags[-1].write("**")
MY_MAPPING = CustomHtmlTagHandlerMapping(
start_tag_mapping={"b": my_handle_start_b},
end_tag_mapping={"b": my_handle_end_b},
)
HTML = "Welcome to <b>Chur</b>"
html_tree = fromstring(HTML)
inscriptis = Inscriptis(
html_tree, ParserConfig(custom_html_tag_handler_mapping=MY_MAPPING)
)
print(inscriptis.get_text())