Skip to content

Commit

Permalink
πŸ”¨ WIP - beta version of crawl notifications
Browse files Browse the repository at this point in the history
  • Loading branch information
Owen Melbourne committed Nov 9, 2020
1 parent 03a5cf0 commit a8e47c6
Show file tree
Hide file tree
Showing 12 changed files with 213 additions and 6 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Odin is a domain monitoring tool which has the following monitors:
- RobotsHasChanged
- WebsiteIsBackUp
- WebsiteIsDown
- BrowserMessageDetected

## Installing

Expand Down
23 changes: 22 additions & 1 deletion app/Checkers/BrowserConsole.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

namespace App\Checkers;

use App\Website;
use App\CrawledPage;
use App\Crawler\Browsershot;
use App\Notifications\BrowserMessageDetected;

class BrowserConsole
{
Expand All @@ -12,22 +14,41 @@ class BrowserConsole
*/
private $page;

public function __construct(CrawledPage $page)
/**
* @var Website
*/
private $website;

public function __construct(Website $website, CrawledPage $page)
{
$this->page = $page;
$this->website = $website;
}

public function run()
{
$this->fetch();
$this->notify();
}

private function fetch()
{
$this->page->messages = Browsershot::url($this->page->url)
->setBinPath(app_path('Crawler/browser.js'))
->windowSize(1440, 900)
->consoleOutput() ?: null;

$this->page->save();
}

private function notify()
{
if (empty($this->page->messages)) {
return;
}

$this->website->user->notify(
new BrowserMessageDetected($this->website, $this->page)
);
}
}
16 changes: 15 additions & 1 deletion app/Checkers/Page.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Exception;
use App\Website;
use App\CrawledPage;
use Spatie\Crawler\Crawler;
use GuzzleHttp\RequestOptions;
use App\Crawler\CrawlObserver;
Expand All @@ -20,9 +21,22 @@ public function __construct(Website $website)

public function run()
{
$this->prune();
$this->fetch();
}

private function prune()
{
$this
->website
->crawledPages()
->where('updated_at', '<', now()->subMonth())
->get()
->each(function ($url) {
$url->delete();
});
}

private function fetch()
{
try {
Expand All @@ -38,7 +52,7 @@ private function fetch()
],
])
->ignoreRobots()
// ->executeJavaScript()
->executeJavaScript()
->setDelayBetweenRequests(1000)
->setConcurrency(3)
->setCrawlObserver(new CrawlObserver($this->website))
Expand Down
37 changes: 37 additions & 0 deletions app/Console/Commands/ScanCrawlerCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

namespace App\Console\Commands;

use App\Jobs\PageCheck;
use App\Website;
use Illuminate\Console\Command;

class ScanCrawlerCommand extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'scan:crawler';

/**
* The console command description.
*
* @var string
*/
protected $description = 'Schedules the crawler for all websites and collects all the linked URLs.';

/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
Website::canCrawl()->get()->each(function (Website $website) {
dump('Crawler queued for ' . $website->url);
PageCheck::dispatch($website);
});
}
}
3 changes: 3 additions & 0 deletions app/Console/Commands/ScanEverythingCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use App\Website;
use App\Jobs\DnsCheck;
use App\Jobs\PageCheck;
use App\Jobs\RobotsCheck;
use App\Jobs\UptimeCheck;
use App\Checkers\Certificate;
Expand Down Expand Up @@ -43,6 +44,8 @@ public function handle()
dump('Uptime check queued.');
DnsCheck::dispatch($website);
dump('DNS check queued.');
PageCheck::dispatch($website);
dump('DNS check queued.');
echo PHP_EOL;
});
}
Expand Down
3 changes: 2 additions & 1 deletion app/Console/Kernel.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ protected function schedule(Schedule $schedule)
$schedule->command('scan:certificates')->dailyAt('08:00:00')->withoutOverlapping()->runInBackground();
$schedule->command('scan:opengraph')->dailyAt('08:00:00')->withoutOverlapping()->runInBackground();
$schedule->command('scan:consoles')->daily()->withoutOverlapping();
$schedule->command('scan:scan:visual-diffs')->daily()->withoutOverlapping();
$schedule->command('scan:visual-diffs')->daily()->withoutOverlapping();
$schedule->command('scan:crawler')->weekly()->withoutOverlapping();
$schedule->command('horizon:snapshot')->everyFiveMinutes();
}

Expand Down
5 changes: 5 additions & 0 deletions app/CrawledPage.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@ class CrawledPage extends Model
protected $casts = [
'website_id' => 'int',
];

public function website()
{
return $this->belongsTo(Website::class);
}
}
10 changes: 9 additions & 1 deletion app/Crawler/CrawlObserver.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace App\Crawler;

use App\Website;
use App\CrawledPage;
use Illuminate\Support\Str;
use App\Jobs\BrowserConsoleCheck;
use Psr\Http\Message\UriInterface;
Expand Down Expand Up @@ -54,7 +55,7 @@ public function crawled(UriInterface $url, ResponseInterface $response, ?UriInte
$page->exception = null;
$page->response = $response->getStatusCode() . ' - ' . $response->getReasonPhrase();

BrowserConsoleCheck::dispatch($page);
BrowserConsoleCheck::dispatch($this->website, $page);

return $page->save();
}
Expand All @@ -78,6 +79,13 @@ public function crawlFailed(UriInterface $url, RequestException $requestExceptio
$page->response = null;
$page->exception = $requestException->getCode() . ' - ' . $requestException->getMessage();

$this->notify($page);

return $page->save();
}

private function notify(CrawledPage $page)
{
// dump($page->exception);
}
}
10 changes: 10 additions & 0 deletions app/Crawler/browser.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ const callChrome = async () => {
console.log("\n");
});

page.on('pageerror', msg => {
console.log(msg);
console.log("\n");
});

page.on('error', msg => {
console.log(msg);
console.log("\n");
});

if (request.options && request.options.disableJavascript) {
await page.setJavaScriptEnabled(false);
}
Expand Down
12 changes: 10 additions & 2 deletions app/Jobs/BrowserConsoleCheck.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace App\Jobs;

use App\Website;
use App\CrawledPage;
use Illuminate\Bus\Queueable;
use App\Checkers\BrowserConsole;
Expand All @@ -19,14 +20,21 @@ class BrowserConsoleCheck implements ShouldQueue
*/
private $page;

/**
* @var Website
*/
private $website;

/**
* Create a new job instance.
*
* @param Website $website
* @param CrawledPage $page
*/
public function __construct(CrawledPage $page)
public function __construct(Website $website, CrawledPage $page)
{
$this->page = $page;
$this->website = $website;
}

/**
Expand All @@ -36,7 +44,7 @@ public function __construct(CrawledPage $page)
*/
public function handle()
{
$checker = new BrowserConsole($this->page);
$checker = new BrowserConsole($this->website, $this->page);
$checker->run();
}
}
81 changes: 81 additions & 0 deletions app/Notifications/BrowserMessageDetected.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<?php

namespace App\Notifications;

use App\Website;
use App\CrawledPage;
use App\CertificateScan;
use Illuminate\Bus\Queueable;
use Illuminate\Notifications\Notification;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Notifications\Messages\MailMessage;

class BrowserMessageDetected extends Notification
{
use Queueable;

/**
* @var Website
*/
private $website;

/**
* @var CrawledPage
*/
private $page;

/**
* Create a new notification instance.
*
* @param Website $website
* @param CrawledPage $page
*/
public function __construct(Website $website, CrawledPage $page)
{
$this->website = $website;
$this->page = $page;
}

/**
* Get the notification's delivery channels.
*
* @param mixed $notifiable
* @return array
*/
public function via($notifiable)
{
return ['database', 'mail'];
}

/**
* Get the mail representation of the notification.
*
* @param mixed $notifiable
* @return \Illuminate\Notifications\Messages\MailMessage
*/
public function toMail($notifiable)
{
return (new MailMessage)
->subject('πŸ“Ÿ Browser Message Detected: ' . $this->website->url)
->markdown('mail.browser-message', [
'website' => $this->website,
'page' => $this->page,
]);
}

/**
* Get the array representation of the notification.
*
* @param mixed $notifiable
* @return array
*/
public function toArray($notifiable)
{
return [
'website_id' => $this->website->id,
'website' => $this->website->url,
'url' => $this->page->url,
'messages' => $this->page->messages,
];
}
}
18 changes: 18 additions & 0 deletions resources/views/mail/browser-message.blade.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@component('mail::message')
# Browser message detected on: {{ $website->url }}

The following URL flagged up a console warning, please investigate.

Please see the output below:

<div style="padding: 20px; background: #f1f2fb;">
<pre style="margin:0;padding:0;"><code style="margin:0;padding:0;">{{ $page->messages }}</code></pre>
</div>

@component('mail::button', ['url' => $website->show_link])
Open Monitor
@endcomponent

Thanks,<br>
{{ config('app.name') }}
@endcomponent

0 comments on commit a8e47c6

Please sign in to comment.