Skip to content

Commit 2baa652

Browse files
ggppdkHLeithner
authored andcommitted
Fix indexer breaking words when they are partly enclosed via inline HTML tags (#16165)
1 parent 4a901a1 commit 2baa652

File tree

1 file changed

+8
-3
lines changed
  • administrator/components/com_finder/helpers/indexer/parser

1 file changed

+8
-3
lines changed

administrator/components/com_finder/helpers/indexer/parser/html.php

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,14 @@ public function parse($input)
6060
// Convert entities equivalent to spaces to actual spaces.
6161
$input = str_replace(array(' ', ' '), ' ', $input);
6262

63-
// This fixes issues such as '<h1>Title</h1><p>Paragraph</p>'
64-
// being transformed into 'TitleParagraph' with no space.
65-
$input = str_replace('>', '> ', $input);
63+
// Add a space before both the OPEN and CLOSE tags of BLOCK and LINE BREAKING elements,
64+
// e.g. 'all<h1><em>m</em>obile List</h1>' will become 'all mobile List'
65+
$input = preg_replace('/(<|<\/)(' .
66+
'address|article|aside|blockquote|br|canvas|dd|div|dl|dt|' .
67+
'fieldset|figcaption|figure|footer|form|h1|h2|h3|h4|h5|h6|header|hgroup|hr|li|' .
68+
'main|nav|noscript|ol|output|p|pre|section|table|tfoot|ul|video' .
69+
')\b/i', ' $1$2', $input
70+
);
6671

6772
// Strip HTML tags.
6873
$input = strip_tags($input);

0 commit comments

Comments
 (0)