From 0d515f585d0ea331a3e7959b1a009d9100af8a3f Mon Sep 17 00:00:00 2001 From: Greg Roach Date: Wed, 11 Oct 2023 16:18:08 +0100 Subject: [PATCH] Update BadBotBlocker to include crawlers that collect training data for LLMs --- app/Http/Middleware/BadBotBlocker.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/Http/Middleware/BadBotBlocker.php b/app/Http/Middleware/BadBotBlocker.php index 113226db06a..4d3ca88ae84 100644 --- a/app/Http/Middleware/BadBotBlocker.php +++ b/app/Http/Middleware/BadBotBlocker.php @@ -71,9 +71,13 @@ class BadBotBlocker implements MiddlewareInterface 'Barkrowler', 'BLEXBot', 'Bytespider', + 'CCBot', // Used to train a number of LLMs + 'ChatGPT-User', // Used by ChatGPT during operation 'DataForSEO', 'DataForSeoBot', // https://dataforseo.com/dataforseo-bot 'DotBot', + 'FacebookBot', // Collects training data for Facebook's LLM translator. + 'Google-Extended', // Collects training data for Google Bard 'GPTBot', // Collects training data for ChatGPT 'Grapeshot', 'Honolulu-bot', // Aggressive crawer, no info available @@ -83,6 +87,7 @@ class BadBotBlocker implements MiddlewareInterface 'MegaIndex.ru', 'MJ12bot', 'netEstate NE', + 'Omgilibot', // Collects training data for LLMs 'panscient', 'PetalBot', 'proximic',