Skip to content

Commit

Permalink
πŸ’₯ Change - added which urls the crawler found links on to help debug
Browse files Browse the repository at this point in the history
  • Loading branch information
Owen Melbourne committed Nov 13, 2020
1 parent ea72140 commit 098b65c
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 7 deletions.
1 change: 1 addition & 0 deletions app/CrawledPage.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class CrawledPage extends Model

protected $casts = [
'website_id' => 'int',
'found_on' => 'array',
];

public function website()
Expand Down
12 changes: 12 additions & 0 deletions app/Crawler/CrawlObserver.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ public function crawled(UriInterface $url, ResponseInterface $response, ?UriInte
$page->exception = null;
$page->response = $response->getStatusCode() . ' - ' . $response->getReasonPhrase();

if ($foundOnUrl) {
$pages = [];

if (!empty($page->found_on)) {
$pages = $page->found_on;
}

$pages[] = (string) $foundOnUrl;

$page->found_on = array_unique($pages);
}

BrowserConsoleCheck::dispatch($this->website, $page);

return $page->save();
Expand Down
5 changes: 4 additions & 1 deletion app/Http/Controllers/ProblematicPageController.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use App\Jobs\PageCheck;
use App\Website;
use Exception;
use Illuminate\Contracts\Foundation\Application;
use Illuminate\Contracts\Routing\ResponseFactory;
use Illuminate\Http\Request;
use Illuminate\Http\Response;
Expand All @@ -17,7 +18,7 @@ class ProblematicPageController extends Controller
*
* @param Request $request
* @param Website $website
* @return void
* @return Application|ResponseFactory|Response
* @throws Exception
*/
public function __invoke(Request $request, Website $website)
Expand All @@ -37,6 +38,7 @@ public function __invoke(Request $request, Website $website)
->where(function ($query) {
$query->whereNotNull('messages');
$query->orWhereNotNull('exception');
$query->orWhere('response', 'NOT LIKE', '%200%');
})
->get()
->toArray();
Expand Down Expand Up @@ -67,6 +69,7 @@ public function scan(Website $website)
* @param Request $request
* @param Website $website
* @param CrawledPage $page
* @return Application|ResponseFactory|Response
* @throws Exception
*/
public function delete(Request $request, Website $website, CrawledPage $page)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;

class AddingFoundOnToCrawler extends Migration
{
/**
* Run the migrations.
*
* @return void
*/
public function up()
{
Schema::table('crawled_pages', function (Blueprint $table) {
$table->longText('found_on')->nullable()->after('exception');
});
}

/**
* Reverse the migrations.
*
* @return void
*/
public function down()
{
Schema::table('crawled_pages', function (Blueprint $table) {
$table->dropColumn('found_on');
});
}
}
2 changes: 1 addition & 1 deletion public/js/maelstrom.js

Large diffs are not rendered by default.

23 changes: 18 additions & 5 deletions resources/js/components/CrawlReport.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,26 @@ export default class CrawlReport extends React.Component {
const expanded = function (row) {
return <div>
<div>
<h4>Response: { row.response || 'Unknown' }</h4>
<h4>Exceptions: { row.exception || 'N/A' }</h4>
<pre style={{
{ row.response && <h4>Response: { row.response || 'Unknown' }</h4> }
{ row.exception && <h4>Exceptions: { row.exception || 'N/A' }</h4> }
{ row.messages && <pre style={{
overflowX: 'auto',
whiteSpace: 'pre-wrap',
wordWrap: 'break-word',
}} className="mt-4 shadow block max-w-full p-6 bg-white m-0" dangerouslySetInnerHTML={{ __html: (row.messages) }} />
}} className="mt-4 shadow block max-w-full p-6 bg-white m-0" dangerouslySetInnerHTML={{ __html: (row.messages) }} /> }
{ row.found_on.length && <div
className="mt-4 ant-list ant-list-sm ant-list-split ant-list-bordered ant-list-something-after-last-item">
<div className="ant-list-header">
<div>URL found on</div>
</div>
<ul className="ant-list-items">
{row.found_on.map((url, i) => <li key={i} className="ant-list-item">
<a href={url} target="_blank">
{ url }
</a>
</li>)}
</ul>
</div> }
</div>
</div>
};
Expand All @@ -94,7 +107,7 @@ export default class CrawlReport extends React.Component {
expandedRowRender={ expanded }
columns={[
{ key: 'response', title: 'Response', dataIndex: 'response', render: (text, row) => tag(row) },
{ key: 'updated_at', title: 'Found on', dataIndex: 'updated_at', render: text => formatDateTime(text) },
{ key: 'updated_at', title: 'Date found', dataIndex: 'updated_at', render: text => formatDateTime(text) },
{ key: 'url', title: 'URL', dataIndex: 'url', render: (text, row) => url(row) },
{ key: 'summary', title: 'Messages', dataIndex: 'summary', render: (text, row) => summary(row) },
{ key: 'id', title: '', dataIndex: 'id', render: (text, row) => remove(row, this)},
Expand Down
2 changes: 2 additions & 0 deletions resources/js/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ export const YELLOW = '#efaf41';
export const RED = '#e23c39';

export const truncate = (string, length = 60) => {
string = string || '';

if (string.length > length && string.length > 0) {
let new_str = string + ' ';
new_str = string.substr(0, length);
Expand Down

0 comments on commit 098b65c

Please sign in to comment.