|
2 | 2 |
|
3 | 3 | class SimpleHtmlParser implements HtmlParserInterface
|
4 | 4 | {
|
| 5 | + private $tagCounts = []; |
5 | 6 | public function parse(string $html): array
|
6 | 7 | {
|
7 |
| - $tagCounts = []; |
8 |
| - $pos = 0; |
9 | 8 |
|
10 |
| - while (($start = strpos($html, '<', $pos)) !== false) { |
11 |
| - $end = strpos($html, '>', $start); |
12 |
| - if ($end === false) { |
13 |
| - break; |
14 |
| - } |
| 9 | + $this->tagCounts = []; |
15 | 10 |
|
16 |
| - $tag = substr($html, $start + 1, $end - $start - 1); |
17 |
| - $tag = strtolower($tag); |
| 11 | + $pattern = '/<([a-zA-Z0-9]+)(?:\s|>)/'; |
18 | 12 |
|
19 |
| - if (strpos($tag, '/') === 0) { |
20 |
| - // Закрывающий тег, удалить его из подсчета |
21 |
| - $tag = substr($tag, 1); |
22 |
| - } elseif (strpos($tag, ' ') !== false) { |
23 |
| - // Пометить и удалить атрибуты |
24 |
| - $tag = substr($tag, 0, strpos($tag, ' ')); |
25 |
| - } |
| 13 | + preg_match_all($pattern, $html, $matches); |
26 | 14 |
|
27 |
| - if (isset($tagCounts[$tag])) { |
28 |
| - $tagCounts[$tag]++; |
| 15 | + $tags = $matches[1]; |
| 16 | + |
| 17 | + foreach ($tags as $tag) { |
| 18 | + $tag = strtolower($tag); |
| 19 | + if (isset($this->tagCounts[$tag])) { |
| 20 | + $this->tagCounts[$tag]++; |
29 | 21 | } else {
|
30 |
| - $tagCounts[$tag] = 1; |
| 22 | + $this->tagCounts[$tag] = 1; |
31 | 23 | }
|
32 |
| - |
33 |
| - $pos = $end + 1; |
34 | 24 | }
|
35 | 25 |
|
36 |
| - return $tagCounts; |
| 26 | + return $this->tagCounts; |
37 | 27 | }
|
38 | 28 | }
|
0 commit comments