Skip to content

Commit

Permalink
feat!: Rework bot detection rule with allow/deny configuration (#1437)
Browse files Browse the repository at this point in the history
This reworks the bot detection rule to be configured like the sensitive info rule, which is to say you can either `allow` or `deny` list of bots. I've also reworked the bot detection to look for almost 600 well-known bots.

Closes #39 - we've changed the configuration format and have validation on `allow` and `deny`.
  • Loading branch information
blaine-arcjet authored Sep 4, 2024
1 parent 4cb8098 commit eef18e3
Show file tree
Hide file tree
Showing 31 changed files with 1,950 additions and 861 deletions.
74 changes: 57 additions & 17 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,63 @@ updates:
# patterns:
# - "*"

- package-ecosystem: npm
directory: /examples/express-bots
schedule:
# Our dependencies should be checked daily
interval: daily
assignees:
- blaine-arcjet
- e-moran
reviewers:
- blaine-arcjet
- e-moran
commit-message:
prefix: deps(example)
prefix-development: deps(example)
groups:
dependencies:
patterns:
- "*"

- package-ecosystem: npm
directory: /examples/express-newman
schedule:
# Our dependencies should be checked daily
interval: daily
assignees:
- blaine-arcjet
- e-moran
reviewers:
- blaine-arcjet
- e-moran
commit-message:
prefix: deps(example)
prefix-development: deps(example)
groups:
dependencies:
patterns:
- "*"

- package-ecosystem: npm
directory: /examples/express-sensitive-info
schedule:
# Our dependencies should be checked daily
interval: daily
assignees:
- blaine-arcjet
- e-moran
reviewers:
- blaine-arcjet
- e-moran
commit-message:
prefix: deps(example)
prefix-development: deps(example)
groups:
dependencies:
patterns:
- "*"

- package-ecosystem: npm
directory: /examples/nextjs-14-app-dir-rl
schedule:
Expand Down Expand Up @@ -430,23 +487,6 @@ updates:
patterns:
- "*"

- package-ecosystem: npm
directory: /examples/express-sensitive-info
schedule:
# Our dependencies should be checked daily
interval: daily
assignees:
- blaine-arcjet
reviewers:
- blaine-arcjet
commit-message:
prefix: deps(example)
prefix-development: deps(example)
groups:
dependencies:
patterns:
- "*"

- package-ecosystem: npm
directory: /examples/nodejs-express-launchdarkly
schedule:
Expand Down
37 changes: 12 additions & 25 deletions analyze/edge-light.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js";
import type {
ImportObject,
EmailValidationConfig,
BotDetectionResult,
BotType,
EmailValidationResult,
DetectedSensitiveInfoEntity,
SensitiveInfoEntities,
SensitiveInfoEntity,
SensitiveInfoResult,
BotConfig,
BotResult,
} from "./wasm/arcjet_analyze_js_req.component.js";
import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js";

Expand Down Expand Up @@ -115,20 +115,7 @@ async function init(

export {
type EmailValidationConfig,
type BotType,
/**
* Represents the result of the bot detection.
*
* @property `botType` - What type of bot this is. This will be one of `BotType`.
* @property `botScore` - A score ranging from 0 to 99 representing the degree of
* certainty. The higher the number within the type category, the greater the
* degree of certainty. E.g. `BotType.Automated` with a score of 1 means we are
* sure the request was made by an automated bot. `BotType.LikelyNotABot` with a
* score of 30 means we don't think this request was a bot, but it's lowest
* confidence level. `BotType.LikelyNotABot` with a score of 99 means we are
* almost certain this request was not a bot.
*/
type BotDetectionResult,
type BotConfig,
type DetectedSensitiveInfoEntity,
type SensitiveInfoEntity,
type DetectSensitiveInfoFunction,
Expand Down Expand Up @@ -173,7 +160,7 @@ export async function isValidEmail(
if (typeof analyze !== "undefined") {
return analyze.isValidEmail(candidate, optionsOrDefault);
} else {
// Skip the local evaluation of the rule if WASM is not available
// Skip the local evaluation of the rule if Wasm is not available
return {
validity: "valid",
blocked: [],
Expand All @@ -183,22 +170,22 @@ export async function isValidEmail(

export async function detectBot(
context: AnalyzeContext,
headers: string,
patterns_add: string,
patterns_remove: string,
): Promise<BotDetectionResult> {
request: AnalyzeRequest,
options: BotConfig,
): Promise<BotResult> {
const analyze = await init(context);

if (typeof analyze !== "undefined") {
return analyze.detectBot(headers, patterns_add, patterns_remove);
return analyze.detectBot(JSON.stringify(request), options);
} else {
// TODO: Fallback to JS if we don't have WASM?
// Skip the local evaluation of the rule if Wasm is not available
return {
botType: "not-analyzed",
botScore: 0,
allowed: [],
denied: [],
};
}
}

export async function detectSensitiveInfo(
context: AnalyzeContext,
candidate: string,
Expand Down
34 changes: 10 additions & 24 deletions analyze/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js";
import type {
ImportObject,
EmailValidationConfig,
BotDetectionResult,
BotType,
EmailValidationResult,
DetectedSensitiveInfoEntity,
SensitiveInfoEntities,
SensitiveInfoEntity,
SensitiveInfoResult,
BotConfig,
BotResult,
} from "./wasm/arcjet_analyze_js_req.component.js";
import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js";

Expand Down Expand Up @@ -129,20 +129,7 @@ async function init(

export {
type EmailValidationConfig,
type BotType,
/**
* Represents the result of the bot detection.
*
* @property `botType` - What type of bot this is. This will be one of `BotType`.
* @property `botScore` - A score ranging from 0 to 99 representing the degree of
* certainty. The higher the number within the type category, the greater the
* degree of certainty. E.g. `BotType.Automated` with a score of 1 means we are
* sure the request was made by an automated bot. `BotType.LikelyNotABot` with a
* score of 30 means we don't think this request was a bot, but it's lowest
* confidence level. `BotType.LikelyNotABot` with a score of 99 means we are
* almost certain this request was not a bot.
*/
type BotDetectionResult,
type BotConfig,
type DetectedSensitiveInfoEntity,
type SensitiveInfoEntity,
type DetectSensitiveInfoFunction,
Expand Down Expand Up @@ -197,19 +184,18 @@ export async function isValidEmail(

export async function detectBot(
context: AnalyzeContext,
headers: string,
patterns_add: string,
patterns_remove: string,
): Promise<BotDetectionResult> {
request: AnalyzeRequest,
options: BotConfig,
): Promise<BotResult> {
const analyze = await init(context);

if (typeof analyze !== "undefined") {
return analyze.detectBot(headers, patterns_add, patterns_remove);
return analyze.detectBot(JSON.stringify(request), options);
} else {
// TODO: Fallback to JS if we don't have WASM?
// Skip the local evaluation of the rule if Wasm is not available
return {
botType: "not-analyzed",
botScore: 0,
allowed: [],
denied: [],
};
}
}
Expand Down
Binary file modified analyze/wasm/arcjet_analyze_js_req.component.core.wasm
Binary file not shown.
Binary file modified analyze/wasm/arcjet_analyze_js_req.component.core2.wasm
Binary file not shown.
Binary file modified analyze/wasm/arcjet_analyze_js_req.component.core3.wasm
Binary file not shown.
44 changes: 23 additions & 21 deletions analyze/wasm/arcjet_analyze_js_req.component.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,6 @@ export { SensitiveInfoEntity };
/**
* # Variants
*
* ## `"unspecified"`
*
* ## `"not-analyzed"`
*
* ## `"automated"`
*
* ## `"likely-automated"`
*
* ## `"likely-not-a-bot"`
*
* ## `"verified-bot"`
*/
export type BotType = 'unspecified' | 'not-analyzed' | 'automated' | 'likely-automated' | 'likely-not-a-bot' | 'verified-bot';
export interface BotDetectionResult {
botType: BotType,
botScore: number,
}
/**
* # Variants
*
* ## `"valid"`
*
* ## `"invalid"`
Expand Down Expand Up @@ -60,6 +40,28 @@ export interface SensitiveInfoResult {
allowed: Array<DetectedSensitiveInfoEntity>,
denied: Array<DetectedSensitiveInfoEntity>,
}
export type BotEntity = string;
export interface AllowedBotConfig {
entities: Array<BotEntity>,
skipCustomDetect: boolean,
}
export interface DeniedBotConfig {
entities: Array<BotEntity>,
skipCustomDetect: boolean,
}
export type BotConfig = BotConfigAllowedBotConfig | BotConfigDeniedBotConfig;
export interface BotConfigAllowedBotConfig {
tag: 'allowed-bot-config',
val: AllowedBotConfig,
}
export interface BotConfigDeniedBotConfig {
tag: 'denied-bot-config',
val: DeniedBotConfig,
}
export interface BotResult {
allowed: Array<BotEntity>,
denied: Array<BotEntity>,
}
import { ArcjetJsReqEmailValidatorOverrides } from './interfaces/arcjet-js-req-email-validator-overrides.js';
import { ArcjetJsReqLogger } from './interfaces/arcjet-js-req-logger.js';
import { ArcjetJsReqSensitiveInformationIdentifier } from './interfaces/arcjet-js-req-sensitive-information-identifier.js';
Expand All @@ -69,7 +71,7 @@ export interface ImportObject {
'arcjet:js-req/sensitive-information-identifier': typeof ArcjetJsReqSensitiveInformationIdentifier,
}
export interface Root {
detectBot(headers: string, patternsAdd: string, patternsRemove: string): BotDetectionResult,
detectBot(request: string, options: BotConfig): BotResult,
generateFingerprint(request: string, characteristics: Array<string>): string,
isValidEmail(candidate: string, options: EmailValidationConfig): EmailValidationResult,
detectSensitiveInfo(content: string, options: SensitiveInfoConfig): SensitiveInfoResult,
Expand Down
Loading

0 comments on commit eef18e3

Please sign in to comment.