Skip to content

Commit

Permalink
Add Similar Search (#38)
Browse files Browse the repository at this point in the history
* FIX: Remove HTML and characters that break manticore

* ENHANCEMENT: Addition of similar search

* MINOR: Addition of Similar link

* FIX: lemmatizer not working currently

* ENHANCEMENT: Addition of similarTo fields

* FIX: Extraction of text for flickr photos now working

* ENHANCEMENT: Similar results

* ENHANCEMENT: Addition of similar to info in template

* FIX: PHPCS coding standards

* FIX: PHPStan now passes

* FIX: tests now pass
  • Loading branch information
gordonbanderson authored Sep 17, 2020
1 parent 2f8bcf9 commit 138ef2f
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 35 deletions.
2 changes: 1 addition & 1 deletion _config/indexes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Suilven\FreeTextSearch\Indexes:
- index:
name: sitetree
class: SilverStripe\CMS\Model\SiteTree
tokenizer: lemmatizer
tokenizer: porter
# language: en
fields:
- Title
Expand Down
15 changes: 15 additions & 0 deletions src/Container/SearchResults.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class SearchResults
/** @var float the time in seconds */
private $time;

/** @var \SilverStripe\ORM\DataObject|null */
private $searchSimilarTo = null;

public function __construct()
{
$this->time = 0;
Expand Down Expand Up @@ -165,4 +168,16 @@ public function setTime(float $newTime): void
{
$this->time = $newTime;
}


public function getSimilarTo(): ?\SilverStripe\ORM\DataObject
{
return $this->searchSimilarTo;
}


public function setSimilarTo(?\SilverStripe\ORM\DataObject $dataObject): void
{
$this->searchSimilarTo = $dataObject;
}
}
33 changes: 25 additions & 8 deletions src/Helper/SearchHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
namespace Suilven\FreeTextSearch\Helper;

use SilverStripe\ORM\DataObject;
use Suilven\FreeTextSearch\Indexes;

class SearchHelper
{
Expand All @@ -21,31 +22,47 @@ public function getTextFieldPayload(DataObject $dataObject): array
{
$helper = new IndexingHelper();
$fullPayload = $helper->getFieldsToIndex($dataObject);

$textPayload = [];

$keys = \array_keys($fullPayload);
$specsHelper = new SpecsHelper();

foreach ($keys as $key) {
if ($fullPayload[$key] === []) {

foreach ($keys as $indexKey) {
$indexes = new Indexes();
$index = $indexes->getIndex($indexKey);
$textualFields = $index->getFields();

// if the index details are empty, skip
if ($fullPayload[$indexKey] === []) {
continue;
}

$textPayload[$key] = [];
$specs = $specsHelper->getFieldSpecs($key);
$textPayload[$indexKey] = [];
$specs = $specsHelper->getFieldSpecs($indexKey);

foreach (\array_keys($specs) as $field) {
// skip link field
if ($field === 'Link') {
// skip non textual fields
if (!\in_array($field, $textualFields, true)) {
continue;
}


$type = $specs[$field];
if (!\in_array($type, ['Varchar', 'HTMLText'], true)) {
continue;
}

$textPayload[$key][$field] = (string) $fullPayload[$key][$field];
$fieldValue = (string) $fullPayload[$indexKey][$field];
$barchars = ['!', ',', '.', '-'];
$fieldValue = \strip_tags($fieldValue);

foreach ($barchars as $badChar) {
$fieldValue = \str_replace($badChar, '', $fieldValue);
}

$fieldValue = \str_replace('/', ' ', $fieldValue);
$textPayload[$indexKey][$field] = $fieldValue;
}
}

Expand Down
99 changes: 75 additions & 24 deletions src/Page/SearchPageController.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,40 @@
class SearchPageController extends \PageController
{
/** @var array<string> */
private static $allowed_actions = ['index'];
private static $allowed_actions = ['index', 'similar'];


public function similar(): \SilverStripe\View\ViewableData_Customised
{
/** @var \Suilven\FreeTextSearch\Page\SearchPage $model */
$model = SearchPage::get_by_id(SearchPage::class, $this->ID);

$indexes = new Indexes();
$index = $indexes->getIndex($model->IndexToSearch);

/** @var string $clazz */
$clazz = $index->getClass();

$dataObjectID = $this->getRequest()->param('ID');
$dataObjectID = \intval($dataObjectID);


$objectInContext = DataObject::get_by_id($clazz, $dataObjectID);

$factory = new SearcherFactory();
$searcher = $factory->getSearcher();
$searcher->setIndexName($index->getName());
$this->paginateSearcher($searcher);

$results = $searcher->searchForSimilar($objectInContext);

// tweak results set for rendering purposes, we do not want all the OR constructs
$results->setQuery('');
$results->setSimilarTo($objectInContext);

return $this->renderSearchResults($model, $results);
}


public function index(): \SilverStripe\View\ViewableData_Customised
{
Expand Down Expand Up @@ -128,6 +159,39 @@ public function index(): \SilverStripe\View\ViewableData_Customised
// $results['ShowAllIfEmptyQuery'] = $model->ShowAllIfEmptyQuery;
// $results['CleanedLink'] = $this->Link();

return $this->renderSearchResults($model, $results);
}


/** @param array<string,int|string|float|bool> $selected */
public function performSearchIncludingFacets(array $selected, SearchPage $searchPage, ?string $q): SearchResults
{
$factory = new SearcherFactory();

/** @var \Suilven\FreeTextSearch\Interfaces\Searcher $searcher */
$searcher = $factory->getSearcher();
$searcher->setFilters($selected);
$searcher->setIndexName($searchPage->IndexToSearch);

\error_log('SEARCH PAGE PAGE SIZE: ' . $searchPage->PageSize);

$facets = $searchPage->getFacetFields();
$hasManyFields = $searchPage->getHasManyFields();

$searcher->setFacettedTokens($facets);
$searcher->setHasManyTokens($hasManyFields);

$this->paginateSearcher($searcher);

return $searcher->search($q);
}


/** @throws \Exception */
private function renderSearchResults(
SearchPage $model,
SearchResults $results
): \SilverStripe\View\ViewableData_Customised {
$indexes = new Indexes();
$index = $indexes->getIndex($model->IndexToSearch);

Expand All @@ -139,7 +203,9 @@ public function index(): \SilverStripe\View\ViewableData_Customised
$last = \array_pop($splits);
$templateName = \implode('/', $splits) . '/Includes/' . $last;


$records = $results->getRecords();

$newRecords = new ArrayList();
foreach ($records as $record) {
$highsList = new ArrayList();
Expand Down Expand Up @@ -176,10 +242,13 @@ public function index(): \SilverStripe\View\ViewableData_Customised

$html = $this->renderWith(
[
$templateName,
'Suilven/FreeTextSearch/SilverStripe/CMS/Model/Includes/SiteTree',
$templateName,
'Suilven/FreeTextSearch/SilverStripe/CMS/Model/Includes/SiteTree',
],
['Record' => $record]
[
'Record' => $record,
'SimilarLink' => $this->Link('similar') . '/' . $record->ID,
]
);
$record->HTML = $html;
$newRecords->push($record);
Expand All @@ -201,28 +270,13 @@ public function index(): \SilverStripe\View\ViewableData_Customised
'Suggestions' => new ArrayList($results->getSuggestions()),
'Time' => $results->getTime(),
'Pagination' => $paginatedList,
'SimilarTo' => $results->getSimilarTo(),
]));
}


/** @param array<string,int|string|float|bool> $selected */
public function performSearchIncludingFacets(array $selected, SearchPage $searchPage, ?string $q): SearchResults
private function paginateSearcher(\Suilven\FreeTextSearch\Interfaces\Searcher &$searcher): void
{
$factory = new SearcherFactory();

/** @var \Suilven\FreeTextSearch\Interfaces\Searcher $searcher */
$searcher = $factory->getSearcher();
$searcher->setFilters($selected);
$searcher->setIndexName($searchPage->IndexToSearch);

\error_log('SEARCH PAGE PAGE SIZE: ' . $searchPage->PageSize);

$facets = $searchPage->getFacetFields();
$hasManyFields = $searchPage->getHasManyFields();

$searcher->setFacettedTokens($facets);
$searcher->setHasManyTokens($hasManyFields);

$searcher->setPageSize($this->PageSize);
$start = $this->getRequest()->getVar('start');

Expand All @@ -233,9 +287,6 @@ public function performSearchIncludingFacets(array $selected, SearchPage $search
: 1;

$page = \intval($page);
\error_log('PAGE: ' . $page);
$searcher->setPage($page);

return $searcher->search($q);
}
}
6 changes: 5 additions & 1 deletion templates/Suilven/FreeTextSearch/Page/Layout/SearchPage.ss
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@
<% include SideBar %>
<% end_if %>
<h1>$Title</h1>
<form id="searchForm" action="$URL">
<form id="searchForm" action="$Link">
<input name="q" type="text" placeholder="Search..." value="$Query">
<button type="submit">Search</button>
</form>

<% if $SimilarTo %>
Similar to <a href="$SimilarTo.Link">$SimilarTo.Title</a>
<% end_if %>

<% if $NumberOfResults > 0 %>
<div class="searchResultsInfo">$NumberOfResults results found in $Time seconds</div>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
<% loop $Record.Highlights %>
$Snippet.RAW
<% end_loop %>
<p class="links"><a href="$SimilarLink">Similar</a></p>
</div>
2 changes: 1 addition & 1 deletion tests/Helper/SearchHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public function testTextPayload(): void
$payload = $helper->getTextFieldPayload($page);
$this->assertEquals(['sitetree' => [
'Title' => 'The Break In San Marino Is Bright',
'Content' => 'The wind in Kenya is waste.',
'Content' => 'The wind in Kenya is waste',
'MenuTitle' => 'The Break In San Marino Is Bright',
]], $payload);
}
Expand Down

0 comments on commit 138ef2f

Please sign in to comment.