matthewwithanm
diff --git a/‎.github/workflows/python-app.yml‎
Lines changed: 20 additions & 1 deletion b/‎.github/workflows/python-app.yml‎
Lines changed: 20 additions & 1 deletion
diff --git a/‎.github/workflows/python-publish.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/python-publish.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.rst‎
Lines changed: 18 additions & 1 deletion b/‎README.rst‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎markdownify/__init__.py‎
Lines changed: 76 additions & 18 deletions b/‎markdownify/__init__.py‎
Lines changed: 76 additions & 18 deletions
diff --git a/‎markdownify/__init__.pyi‎
Lines changed: 77 additions & 0 deletions b/‎markdownify/__init__.pyi‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎markdownify/main.py‎
100644100755
Lines changed: 8 additions & 1 deletion b/‎markdownify/main.py‎
100644100755
Lines changed: 8 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python 3.8
       uses: actions/setup-python@v2
       with:
@@ -30,3 +30,22 @@ jobs:
     - name: Build
       run: |
         python -m build -nwsx .
+
+  types:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install --upgrade setuptools setuptools_scm wheel build tox mypy types-beautifulsoup4
+    - name: Check types
+      run: |
+        mypy .
+        mypy --strict tests/types.py
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python
       uses: actions/setup-python@v2
       with:
 
@@ -110,7 +110,7 @@ code_language_callback
   When the HTML code contains ``pre`` tags that in some way provide the code
   language, for example as class, this callback can be used to extract the
   language from the tag and prefix it to the converted ``pre`` tag.
-  The callback gets one single argument, an BeautifylSoup object, and returns
+  The callback gets one single argument, a BeautifulSoup object, and returns
   a string containing the code language, or ``None``.
   An example to use the class name as code language could be::
 
@@ -157,6 +157,23 @@ strip_document
   within the document are unaffected.
   Defaults to ``STRIP``.
 
+strip_pre
+  Controls whether leading/trailing blank lines are removed from ``<pre>``
+  tags. Supported values are ``STRIP`` (all leading/trailing blank lines),
+  ``STRIP_ONE`` (one leading/trailing blank line), and ``None`` (neither).
+  Defaults to ``STRIP``.
+
+bs4_options
+  Specify additional configuration options for the ``BeautifulSoup`` object
+  used to interpret the HTML markup. String and list values (such as ``lxml``
+  or ``html5lib``) are treated as ``features`` arguments to control parser
+  selection. Dictionary values (such as ``{"from_encoding": "iso-8859-8"}``)
+  are treated as full kwargs to be used for the BeautifulSoup constructor,
+  allowing specification of any parameter. For parameter details, see the
+  Beautiful Soup documentation at:
+
+.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
+
 Options may be specified as kwargs to the ``markdownify`` function, or as a
 nested ``Options`` class in ``MarkdownConverter`` subclasses.
 
 
@@ -11,6 +11,10 @@
 re_all_whitespace = re.compile(r'[\t \r\n]+')
 re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
 re_html_heading = re.compile(r'h(\d+)')
+re_pre_lstrip1 = re.compile(r'^ *\n')
+re_pre_rstrip1 = re.compile(r'\n *$')
+re_pre_lstrip = re.compile(r'^[ \n]*\n')
+re_pre_rstrip = re.compile(r'[ \n]*$')
 
 # Pattern for creating convert_<tag> function names from tag names
 re_make_convert_fn_name = re.compile(r'[\[\]:-]')
@@ -37,6 +41,9 @@
 # confused with a list item
 re_escape_misc_list_items = re.compile(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))')
 
+# Find consecutive backtick sequences in a string
+re_backtick_runs = re.compile(r'`+')
+
 # Heading styles
 ATX = 'atx'
 ATX_CLOSED = 'atx_closed'
@@ -51,10 +58,25 @@
 ASTERISK = '*'
 UNDERSCORE = '_'
 
-# Document strip styles
+# Document/pre strip styles
 LSTRIP = 'lstrip'
 RSTRIP = 'rstrip'
 STRIP = 'strip'
+STRIP_ONE = 'strip_one'
+
+
+def strip1_pre(text):
+    """Strip one leading and trailing newline from a <pre> string."""
+    text = re_pre_lstrip1.sub('', text)
+    text = re_pre_rstrip1.sub('', text)
+    return text
+
+
+def strip_pre(text):
+    """Strip all leading and trailing newlines from a <pre> string."""
+    text = re_pre_lstrip.sub('', text)
+    text = re_pre_rstrip.sub('', text)
+    return text
 
 
 def chomp(text):
@@ -154,6 +176,7 @@ def _next_block_content_sibling(el):
 class MarkdownConverter(object):
     class DefaultOptions:
         autolinks = True
+        bs4_options = 'html.parser'
         bullets = '*+-'  # An iterable of bullet types.
         code_language = ''
         code_language_callback = None
@@ -167,6 +190,7 @@ class DefaultOptions:
         newline_style = SPACES
         strip = None
         strip_document = STRIP
+        strip_pre = STRIP
         strong_em_symbol = ASTERISK
         sub_symbol = ''
         sup_symbol = ''
@@ -187,11 +211,15 @@ def __init__(self, **options):
             raise ValueError('You may specify either tags to strip or tags to'
                              ' convert, but not both.')
 
+        # If a string or list is passed to bs4_options, assume it is a 'features' specification
+        if not isinstance(self.options['bs4_options'], dict):
+            self.options['bs4_options'] = {'features': self.options['bs4_options']}
+
         # Initialize the conversion function cache
         self.convert_fn_cache = {}
 
     def convert(self, html):
-        soup = BeautifulSoup(html, 'html.parser')
+        soup = BeautifulSoup(html, **self.options['bs4_options'])
         return self.convert_soup(soup)
 
     def convert_soup(self, soup):
@@ -362,16 +390,20 @@ def get_conv_fn(self, tag_name):
         if not self.should_convert_tag(tag_name):
             return None
 
-        # Handle headings with _convert_hn() function
+        # Look for an explicitly defined conversion function by tag name first
+        convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub("_", tag_name)
+        convert_fn = getattr(self, convert_fn_name, None)
+        if convert_fn:
+            return convert_fn
+
+        # If tag is any heading, handle with convert_hN() function
         match = re_html_heading.match(tag_name)
         if match:
-            n = int(match.group(1))
-            return lambda el, text, parent_tags: self._convert_hn(n, el, text, parent_tags)
+            n = int(match.group(1))  # get value of N from <hN>
+            return lambda el, text, parent_tags: self.convert_hN(n, el, text, parent_tags)
 
-        # For other tags, look up their conversion function by tag name
-        convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub('_', tag_name)
-        convert_fn = getattr(self, convert_fn_name, None)
-        return convert_fn
+        # No conversion function was found
+        return None
 
     def should_convert_tag(self, tag):
         """Given a tag name, return whether to convert based on strip/convert options."""
@@ -451,10 +483,24 @@ def convert_br(self, el, text, parent_tags):
             return '  \n'
 
     def convert_code(self, el, text, parent_tags):
-        if 'pre' in parent_tags:
+        if '_noformat' in parent_tags:
             return text
-        converter = abstract_inline_conversion(lambda self: '`')
-        return converter(self, el, text, parent_tags)
+
+        prefix, suffix, text = chomp(text)
+        if not text:
+            return ''
+
+        # Find the maximum number of consecutive backticks in the text, then
+        # delimit the code span with one more backtick than that
+        max_backticks = max((len(match) for match in re.findall(re_backtick_runs, text)), default=0)
+        markup_delimiter = '`' * (max_backticks + 1)
+
+        # If the maximum number of backticks is greater than zero, add a space
+        # to avoid interpretation of inside backticks as literals
+        if max_backticks > 0:
+            text = " " + text + " "
+
+        return '%s%s%s%s%s' % (prefix, markup_delimiter, text, markup_delimiter, suffix)
 
     convert_del = abstract_inline_conversion(lambda self: '~~')
 
@@ -509,12 +555,12 @@ def convert_dt(self, el, text, parent_tags):
 
         return '\n\n%s\n' % text
 
-    def _convert_hn(self, n, el, text, parent_tags):
-        """ Method name prefixed with _ to prevent <hn> to call this """
+    def convert_hN(self, n, el, text, parent_tags):
+        # convert_hN() converts <hN> tags, where N is any integer
         if '_inline' in parent_tags:
             return text
 
-        # prevent MemoryErrors in case of very large n
+        # Markdown does not support heading depths of n > 6
         n = max(1, min(6, n))
 
         style = self.options['heading_style'].lower()
@@ -647,8 +693,20 @@ def convert_pre(self, el, text, parent_tags):
         if self.options['code_language_callback']:
             code_language = self.options['code_language_callback'](el) or code_language
 
+        if self.options['strip_pre'] == STRIP:
+            text = strip_pre(text)  # remove all leading/trailing newlines
+        elif self.options['strip_pre'] == STRIP_ONE:
+            text = strip1_pre(text)  # remove one leading/trailing newline
+        elif self.options['strip_pre'] is None:
+            pass  # leave leading and trailing newlines as-is
+        else:
+            raise ValueError('Invalid value for strip_pre: %s' % self.options['strip_pre'])
+
         return '\n\n```%s\n%s\n```\n\n' % (code_language, text)
 
+    def convert_q(self, el, text, parent_tags):
+        return '"' + text + '"'
+
     def convert_script(self, el, text, parent_tags):
         return ''
 
@@ -677,13 +735,13 @@ def convert_figcaption(self, el, text, parent_tags):
     def convert_td(self, el, text, parent_tags):
         colspan = 1
         if 'colspan' in el.attrs and el['colspan'].isdigit():
-            colspan = int(el['colspan'])
+            colspan = max(1, min(1000, int(el['colspan'])))
         return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
 
     def convert_th(self, el, text, parent_tags):
         colspan = 1
         if 'colspan' in el.attrs and el['colspan'].isdigit():
-            colspan = int(el['colspan'])
+            colspan = max(1, min(1000, int(el['colspan'])))
         return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
 
     def convert_tr(self, el, text, parent_tags):
@@ -704,7 +762,7 @@ def convert_tr(self, el, text, parent_tags):
         full_colspan = 0
         for cell in cells:
             if 'colspan' in cell.attrs and cell['colspan'].isdigit():
-                full_colspan += int(cell["colspan"])
+                full_colspan += max(1, min(1000, int(cell['colspan'])))
             else:
                 full_colspan += 1
         if ((is_headrow
 
@@ -0,0 +1,77 @@
+from _typeshed import Incomplete
+from typing import Callable, Union
+
+ATX: str
+ATX_CLOSED: str
+UNDERLINED: str
+SETEXT = UNDERLINED
+SPACES: str
+BACKSLASH: str
+ASTERISK: str
+UNDERSCORE: str
+LSTRIP: str
+RSTRIP: str
+STRIP: str
+STRIP_ONE: str
+
+
+def markdownify(
+    html: str,
+    autolinks: bool = ...,
+    bs4_options: str = ...,
+    bullets: str = ...,
+    code_language: str = ...,
+    code_language_callback: Union[Callable[[Incomplete], Union[str, None]], None] = ...,
+    convert: Union[list[str], None] = ...,
+    default_title: bool = ...,
+    escape_asterisks: bool = ...,
+    escape_underscores: bool = ...,
+    escape_misc: bool = ...,
+    heading_style: str = ...,
+    keep_inline_images_in: list[str] = ...,
+    newline_style: str = ...,
+    strip: Union[list[str], None] = ...,
+    strip_document: Union[str, None] = ...,
+    strip_pre: str = ...,
+    strong_em_symbol: str = ...,
+    sub_symbol: str = ...,
+    sup_symbol: str = ...,
+    table_infer_header: bool = ...,
+    wrap: bool = ...,
+    wrap_width: int = ...,
+) -> str: ...
+
+
+class MarkdownConverter:
+    def __init__(
+        self,
+        autolinks: bool = ...,
+        bs4_options: str = ...,
+        bullets: str = ...,
+        code_language: str = ...,
+        code_language_callback: Union[Callable[[Incomplete], Union[str, None]], None] = ...,
+        convert: Union[list[str], None] = ...,
+        default_title: bool = ...,
+        escape_asterisks: bool = ...,
+        escape_underscores: bool = ...,
+        escape_misc: bool = ...,
+        heading_style: str = ...,
+        keep_inline_images_in: list[str] = ...,
+        newline_style: str = ...,
+        strip: Union[list[str], None] = ...,
+        strip_document: Union[str, None] = ...,
+        strip_pre: str = ...,
+        strong_em_symbol: str = ...,
+        sub_symbol: str = ...,
+        sup_symbol: str = ...,
+        table_infer_header: bool = ...,
+        wrap: bool = ...,
+        wrap_width: int = ...,
+    ) -> None:
+        ...
+  
+    def convert(self, html: str) -> str:
+        ...
+
+    def convert_soup(self, soup: Incomplete) -> str:
+        ...
@@ -55,7 +55,9 @@ def main(argv=sys.argv[1:]):
     parser.add_argument('--no-escape-underscores', dest='escape_underscores',
                         action='store_false',
                         help="Do not escape '_' to '\\_' in text.")
-    parser.add_argument('-i', '--keep-inline-images-in', nargs='*',
+    parser.add_argument('-i', '--keep-inline-images-in',
+                        default=[],
+                        nargs='*',
                         help="Images are converted to their alt-text when the images are "
                         "located inside headlines or table cells. If some inline images "
                         "should be converted to markdown images instead, this option can "
@@ -68,6 +70,11 @@ def main(argv=sys.argv[1:]):
     parser.add_argument('-w', '--wrap', action='store_true',
                         help="Wrap all text paragraphs at --wrap-width characters.")
     parser.add_argument('--wrap-width', type=int, default=80)
+    parser.add_argument('--bs4-options',
+                        default='html.parser',
+                        help="Specifies the parser that BeautifulSoup should use to parse "
+                             "the HTML markup. Examples include 'html5.parser', 'lxml', and "
+                             "'html5lib'.")
 
     args = parser.parse_args(argv)
     print(markdownify(**vars(args)))
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "markdownify"
-version = "1.1.0"
+version = "1.2.0"
 authors = [{name = "Matthew Tretter", email = "[email protected]"}]
 description = "Convert HTML to markdown."
 readme = "README.rst"