diff --git a/administrator/components/com_finder/src/Indexer/Indexer.php b/administrator/components/com_finder/src/Indexer/Indexer.php index 22ba7c85a8e63..ce408998d974b 100644 --- a/administrator/components/com_finder/src/Indexer/Indexer.php +++ b/administrator/components/com_finder/src/Indexer/Indexer.php @@ -386,6 +386,8 @@ public function index($item, $format = 'html') // Truncate the tokens aggregate table. $db->truncateTable('#__finder_tokens_aggregate'); + $formats = $item->getPropertyFormats(); + /* * Process the item's content. The items can customize their * processing instructions to define extra properties to process @@ -399,6 +401,12 @@ public function index($item, $format = 'html') continue; } + /** + * Discover the parser to use. For legacy reasons a set $format takes precedence, + * otherwise we use the format that is set or fallback to html + */ + $parseFormat = $format == 'html' ? ($formats[$property] ?? 'html') : $format; + // Tokenize the property. if (\is_array($item->$property)) { // Tokenize an array of content and add it to the database. @@ -414,7 +422,7 @@ public function index($item, $format = 'html') } // Tokenize a string of content and add it to the database. - $count += $this->tokenizeToDb($ip, $group, $item->language, $format, $count); + $count += $this->tokenizeToDb($ip, $group, $item->language, $parseFormat, $count); // Check if we're approaching the memory limit of the token table. if ($count > static::$state->options->get('memory_table_limit', 7500)) { @@ -434,10 +442,10 @@ public function index($item, $format = 'html') } // Tokenize a string of content and add it to the database. - $count += $this->tokenizeToDb($item->$property, $group, $item->language, $format, $count); + $count += $this->tokenizeToDb($item->$property, $group, $item->language, $parseFormat, $count); // Check if we're approaching the memory limit of the token table. - if ($count > static::$state->options->get('memory_table_limit', 30000)) { + if ($count > static::$state->options->get('memory_table_limit', 7500)) { $this->toggleTables(false); } } @@ -827,7 +835,7 @@ protected function tokenizeToDb($input, $context, $lang, $format, $count = 0) // Batch the process out to avoid memory limits. while (!feof($input)) { // Read into the buffer. - $buffer .= fread($input, 2048); + $buffer .= fread($input, 32768); /* * If we haven't reached the end of the file, seek to the last diff --git a/administrator/components/com_finder/src/Indexer/Parser.php b/administrator/components/com_finder/src/Indexer/Parser.php index 76b58e3b70bdb..d40a47823cd12 100644 --- a/administrator/components/com_finder/src/Indexer/Parser.php +++ b/administrator/components/com_finder/src/Indexer/Parser.php @@ -79,15 +79,15 @@ public static function getInstance($format) */ public function parse($input) { - // If the input is less than 2KB we can parse it in one go. - if (\strlen($input) <= 2048) { + // If the input is less than 32KB we can parse it in one go. + if (\strlen($input) <= 32768) { return $this->process($input); } - // Input is longer than 2Kb so parse it in chunks of 2Kb or less. + // Input is longer than 32Kb so parse it in chunks of 2Kb or less. $start = 0; $end = \strlen($input); - $chunk = 2048; + $chunk = 32768; $return = null; while ($start < $end) { diff --git a/administrator/components/com_finder/src/Indexer/Result.php b/administrator/components/com_finder/src/Indexer/Result.php index a9964b8849f95..cfd1b2e484158 100644 --- a/administrator/components/com_finder/src/Indexer/Result.php +++ b/administrator/components/com_finder/src/Indexer/Result.php @@ -52,6 +52,15 @@ class Result implements \Serializable Indexer::MISC_CONTEXT => ['comments'], ]; + /** + * Associative array with keys of properties and the format in which to + * parse them to add to the index + * + * @var string[] + * @since __DEPLOY_VERSION__ + */ + protected $formats = []; + /** * The indexer will use this data to create taxonomy mapping entries for * the item so that it can be filtered by type, label, category, @@ -316,6 +325,18 @@ public function getInstructions() return $this->instructions; } + /** + * Method to get the formats for all properties. + * + * @return array An array of properties with their formats. + * + * @since __DEPLOY_VERSION__ + */ + public function getPropertyFormats() + { + return $this->formats; + } + /** * Method to add a processing instruction for an item property. * @@ -326,13 +347,16 @@ public function getInstructions() * * @since 2.5 */ - public function addInstruction($group, $property) + public function addInstruction($group, $property, $format = 'html') { // Check if the group exists. We can't add instructions for unknown groups. // Check if the property exists in the group. if (\array_key_exists($group, $this->instructions) && !\in_array($property, $this->instructions[$group], true)) { // Add the property to the group. $this->instructions[$group][] = $property; + + // Add the format for the property. + $this->formats[$property] = $format; } } @@ -356,6 +380,7 @@ public function removeInstruction($group, $property) // If the property was found, remove it. if ($key !== false) { unset($this->instructions[$group][$key]); + unset($this->formats[$property]); } } }