Skip to content

Commit

Permalink
Changes to :lang() as defined in recent CSS spec update
Browse files Browse the repository at this point in the history
  • Loading branch information
facelessuser committed Nov 19, 2022
1 parent f38a93a commit 9c02bb8
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 7 deletions.
1 change: 1 addition & 0 deletions docs/src/dictionary/en-custom.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ tuples
un
unmatchable
unpickle
untagged
unvisited
whitespace
wildcard
Expand Down
7 changes: 6 additions & 1 deletion docs/src/markdown/about/changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Changelog

## 2.4.0
## 2.5

- **NEW**: Update to support changes related to `:lang()` in the official CSS spec. `:lang("")` should match unspecified
languages, e.g. `lang=""`, but not `lang=und`.

## 2.4

- **NEW**: `:nth-child()` and `:nth-last-child()` will forgive irregular comma usage.
- **NEW**: Formally drop Python 3.6.
Expand Down
2 changes: 1 addition & 1 deletion soupsieve/__meta__.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev)


__version_info__ = Version(2, 4, 0, "final", post=1)
__version_info__ = Version(2, 5, 0, "final", post=1)
__version__ = __version_info__._get_canonical()
15 changes: 10 additions & 5 deletions soupsieve/css_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,13 +601,18 @@ def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:
ranges = lang_range.split('-')
subtags = lang_tag.lower().split('-')
length = len(ranges)
slength = len(subtags)
rindex = 0
sindex = 0
r = ranges[rindex]
s = subtags[sindex]

# Empty specified language should match unspecified language attributes
if length == 1 and slength == 1 and not r and r == s:
return True

# Primary tag needs to match
if r != '*' and r != s:
if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
match = False

rindex += 1
Expand Down Expand Up @@ -1184,7 +1189,7 @@ def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
break

# Use cached meta language.
if not found_lang and self.cached_meta_lang:
if found_lang is None and self.cached_meta_lang:
for cache in self.cached_meta_lang:
if root is cache[0]:
found_lang = cache[1]
Expand Down Expand Up @@ -1218,13 +1223,13 @@ def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
found_lang = content
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
break
if found_lang:
if found_lang is not None:
break
if not found_lang:
if found_lang is None:
self.cached_meta_lang.append((cast(str, root), ''))

# If we determined a language, compare.
if found_lang:
if found_lang is not None:
for patterns in langs:
match = False
for pattern in patterns:
Expand Down
38 changes: 38 additions & 0 deletions tests/test_level4/test_lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,44 @@ def test_avoid_implicit_language(self):
flags=util.HTML
)

def test_language_und(self):
"""Test that undefined language can be matched by `*`."""

markup = """
<div id="1" lang=""></div>
<div id="2" lang="und"></div>
<div id="3" lang=>
<div id="4"></div>
</div>
<div id="5"></div>
"""

self.assert_selector(
markup,
"div:lang('*')",
['2'],
flags=util.HTML
)

def test_language_empty_string(self):
"""Test that an empty string language will only match untagged languages `lang=""`."""

markup = """
<div id="1" lang=""></div>
<div id="2" lang="und"></div>
<div id="3" lang=>
<div id="4"></div>
</div>
<div id="5"></div>
"""

self.assert_selector(
markup,
"div:lang('')",
['1', '3', '4'],
flags=util.HTML
)

def test_language_list(self):
"""Test language list."""

Expand Down

0 comments on commit 9c02bb8

Please sign in to comment.