Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions administrator/components/com_finder/src/Indexer/Indexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,8 @@ public function index($item, $format = 'html')
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');

$formats = $item->getPropertyFormats();

/*
* Process the item's content. The items can customize their
* processing instructions to define extra properties to process
Expand All @@ -399,6 +401,12 @@ public function index($item, $format = 'html')
continue;
}

/**
* Discover the parser to use. For legacy reasons a set $format takes precedence,
* otherwise we use the format that is set or fallback to html
*/
$parseFormat = $format == 'html' ? ($formats[$property] ?? 'html') : $format;

// Tokenize the property.
if (\is_array($item->$property)) {
// Tokenize an array of content and add it to the database.
Expand All @@ -414,7 +422,7 @@ public function index($item, $format = 'html')
}

// Tokenize a string of content and add it to the database.
$count += $this->tokenizeToDb($ip, $group, $item->language, $format, $count);
$count += $this->tokenizeToDb($ip, $group, $item->language, $parseFormat, $count);

// Check if we're approaching the memory limit of the token table.
if ($count > static::$state->options->get('memory_table_limit', 7500)) {
Expand All @@ -434,10 +442,10 @@ public function index($item, $format = 'html')
}

// Tokenize a string of content and add it to the database.
$count += $this->tokenizeToDb($item->$property, $group, $item->language, $format, $count);
$count += $this->tokenizeToDb($item->$property, $group, $item->language, $parseFormat, $count);

// Check if we're approaching the memory limit of the token table.
if ($count > static::$state->options->get('memory_table_limit', 30000)) {
if ($count > static::$state->options->get('memory_table_limit', 7500)) {
$this->toggleTables(false);
}
}
Expand Down Expand Up @@ -827,7 +835,7 @@ protected function tokenizeToDb($input, $context, $lang, $format, $count = 0)
// Batch the process out to avoid memory limits.
while (!feof($input)) {
// Read into the buffer.
$buffer .= fread($input, 2048);
$buffer .= fread($input, 32768);

/*
* If we haven't reached the end of the file, seek to the last
Expand Down
8 changes: 4 additions & 4 deletions administrator/components/com_finder/src/Indexer/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ public static function getInstance($format)
*/
public function parse($input)
{
// If the input is less than 2KB we can parse it in one go.
if (\strlen($input) <= 2048) {
// If the input is less than 32KB we can parse it in one go.
if (\strlen($input) <= 32768) {
return $this->process($input);
}

// Input is longer than 2Kb so parse it in chunks of 2Kb or less.
// Input is longer than 32Kb so parse it in chunks of 2Kb or less.
$start = 0;
$end = \strlen($input);
$chunk = 2048;
$chunk = 32768;
$return = null;

while ($start < $end) {
Expand Down
27 changes: 26 additions & 1 deletion administrator/components/com_finder/src/Indexer/Result.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ class Result implements \Serializable
Indexer::MISC_CONTEXT => ['comments'],
];

/**
* Associative array with keys of properties and the format in which to
* parse them to add to the index
*
* @var string[]
* @since __DEPLOY_VERSION__
*/
protected $formats = [];

/**
* The indexer will use this data to create taxonomy mapping entries for
* the item so that it can be filtered by type, label, category,
Expand Down Expand Up @@ -316,6 +325,18 @@ public function getInstructions()
return $this->instructions;
}

/**
* Method to get the formats for all properties.
*
* @return array An array of properties with their formats.
*
* @since __DEPLOY_VERSION__
*/
public function getPropertyFormats()
{
return $this->formats;
}

/**
* Method to add a processing instruction for an item property.
*
Expand All @@ -326,13 +347,16 @@ public function getInstructions()
*
* @since 2.5
*/
public function addInstruction($group, $property)
public function addInstruction($group, $property, $format = 'html')
{
// Check if the group exists. We can't add instructions for unknown groups.
// Check if the property exists in the group.
if (\array_key_exists($group, $this->instructions) && !\in_array($property, $this->instructions[$group], true)) {
// Add the property to the group.
$this->instructions[$group][] = $property;

// Add the format for the property.
$this->formats[$property] = $format;
}
}

Expand All @@ -356,6 +380,7 @@ public function removeInstruction($group, $property)
// If the property was found, remove it.
if ($key !== false) {
unset($this->instructions[$group][$key]);
unset($this->formats[$property]);
}
}
}
Expand Down