From d9c0894ce5ea2ea06574e505207d9d129d5095f0 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 13:43:23 +0100 Subject: [PATCH 01/12] feat: add fraud detection use case Demonstrate ArcadeDB's multi-model fraud detection capabilities across four signal types: graph traversal, vector similarity, time-series analysis, and full-text fuzzy matching with 8 query patterns. Includes docker-compose, setup script, SQL schema/data, curl queries, Java runner, CI workflow, and design docs. Targets ArcadeDB 26.3.1-SNAPSHOT. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/fraud-detection.yml | 73 +++++ docs/plans/2026-03-02-fraud-detection-ci.md | 135 ++++++++++ .../2026-03-02-fraud-detection-design.md | 243 +++++++++++++++++ fraud-detection/README.md | 101 +++++++ fraud-detection/docker-compose.yml | 13 + fraud-detection/java/pom.xml | 57 ++++ .../com/arcadedb/examples/FraudDetection.java | 250 ++++++++++++++++++ fraud-detection/queries/queries.sh | 134 ++++++++++ fraud-detection/setup.sh | 51 ++++ fraud-detection/sql/01-schema.sql | 55 ++++ fraud-detection/sql/02-data.sql | 165 ++++++++++++ 11 files changed, 1277 insertions(+) create mode 100644 .github/workflows/fraud-detection.yml create mode 100644 docs/plans/2026-03-02-fraud-detection-ci.md create mode 100644 docs/plans/2026-03-02-fraud-detection-design.md create mode 100644 fraud-detection/README.md create mode 100644 fraud-detection/docker-compose.yml create mode 100644 fraud-detection/java/pom.xml create mode 100644 fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java create mode 100755 fraud-detection/queries/queries.sh create mode 100755 fraud-detection/setup.sh create mode 100644 fraud-detection/sql/01-schema.sql create mode 100644 fraud-detection/sql/02-data.sql diff --git a/.github/workflows/fraud-detection.yml b/.github/workflows/fraud-detection.yml new file mode 100644 index 0000000..71299dd --- /dev/null +++ b/.github/workflows/fraud-detection.yml @@ -0,0 +1,73 @@ +name: Fraud Detection CI + +on: + push: + paths: + - fraud-detection/** + - .github/workflows/fraud-detection.yml + pull_request: + paths: + - fraud-detection/** + - .github/workflows/fraud-detection.yml + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: read + strategy: + fail-fast: false + matrix: + runner: [curl, java] + + env: + ARCADEDB_URL: http://localhost:2480 + ARCADEDB_USER: root + ARCADEDB_PASS: arcadedb + + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + + - name: Set up Java + if: matrix.runner == 'java' + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: '21' + distribution: 'temurin' + + - name: Cache Maven repository + if: matrix.runner == 'java' + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('fraud-detection/java/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + + - name: Start ArcadeDB + working-directory: fraud-detection + run: docker compose up -d + + - name: Setup database + working-directory: fraud-detection + run: ./setup.sh + + - name: Run curl queries + if: matrix.runner == 'curl' + working-directory: fraud-detection + run: ./queries/queries.sh + + - name: Build and run Java + if: matrix.runner == 'java' + working-directory: fraud-detection/java + run: | + mvn package --no-transfer-progress + java -jar target/fraud-detection.jar + + - name: Teardown + if: always() + working-directory: fraud-detection + run: docker compose down diff --git a/docs/plans/2026-03-02-fraud-detection-ci.md b/docs/plans/2026-03-02-fraud-detection-ci.md new file mode 100644 index 0000000..be4407b --- /dev/null +++ b/docs/plans/2026-03-02-fraud-detection-ci.md @@ -0,0 +1,135 @@ +# Fraud Detection CI Workflow Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Create `.github/workflows/fraud-detection.yml` — a matrix CI workflow that verifies both the `curl` and `java` runners for the fraud-detection use case on every push and pull request. + +**Architecture:** One `test` job with `matrix: runner: [curl, java]`. Each entry is self-contained: it starts ArcadeDB 26.3.1-SNAPSHOT via `docker compose up -d`, runs `./setup.sh` to load schema and data, runs the language-specific command, then tears down with `if: always()`. Pass criterion is exit code 0. Mirrors `.github/workflows/recommendation-engine.yml` exactly — same action versions, same SHA pins, same step structure. + +**Tech Stack:** GitHub Actions, `actions/checkout@v6` (SHA `de0fac2e`), `actions/setup-java@v5` (SHA `be666c2f`, temurin 21), `actions/cache@v5` (SHA `cdf6c1fa`), Docker Compose, Maven 3.x, Java 21, bash/curl/jq (pre-installed on `ubuntu-latest`) + +--- + +### Task 1: Create the workflow file + +**Files:** +- Create: `.github/workflows/fraud-detection.yml` + +**Step 1: Write the file** + +```yaml +name: Fraud Detection CI + +on: + push: + paths: + - fraud-detection/** + - .github/workflows/fraud-detection.yml + pull_request: + paths: + - fraud-detection/** + - .github/workflows/fraud-detection.yml + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: read + strategy: + fail-fast: false + matrix: + runner: [curl, java] + + env: + ARCADEDB_URL: http://localhost:2480 + ARCADEDB_USER: root + ARCADEDB_PASS: arcadedb + + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + + - name: Set up Java + if: matrix.runner == 'java' + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: '21' + distribution: 'temurin' + + - name: Cache Maven repository + if: matrix.runner == 'java' + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('fraud-detection/java/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + + - name: Start ArcadeDB + working-directory: fraud-detection + run: docker compose up -d + + - name: Setup database + working-directory: fraud-detection + run: ./setup.sh + + - name: Run curl queries + if: matrix.runner == 'curl' + working-directory: fraud-detection + run: ./queries/queries.sh + + - name: Build and run Java + if: matrix.runner == 'java' + working-directory: fraud-detection/java + run: | + mvn package --no-transfer-progress + java -jar target/fraud-detection.jar + + - name: Teardown + if: always() + working-directory: fraud-detection + run: docker compose down +``` + +**Step 2: Validate YAML syntax** + +```bash +python3 -c "import yaml; yaml.safe_load(open('.github/workflows/fraud-detection.yml'))" && echo "YAML valid" +``` + +Expected: `YAML valid` + +**Step 3: Commit** + +```bash +git add .github/workflows/fraud-detection.yml +git commit -m "ci: add fraud-detection workflow (curl + java matrix)" +``` + +--- + +### Task 2: Verify the workflow triggers + +**Step 1: Push the branch and check GitHub Actions** + +```bash +git push origin feat/fraud-detection +``` + +Open: `https://github.com/arcadedata/arcadedb-usecases/actions` + +Expected: a `Fraud Detection CI` run appears with two jobs — `test (curl)` and `test (java)`. + +**Step 2: Confirm both jobs pass** + +Both `test (curl)` and `test (java)` should show green checkmarks. If either fails, check the step-level logs: + +- **Start ArcadeDB fails:** confirm `docker compose up -d` runs from the `fraud-detection/` directory — check `working-directory`. Note: uses `arcadedata/arcadedb:26.3.1-SNAPSHOT` which must be available on Docker Hub; if the image doesn't exist yet, the job will fail at this step +- **Setup database fails:** `setup.sh` may be timing out waiting for ArcadeDB; check if the healthcheck `retries: 20` at 5s intervals (100s total) is enough — if not, add a `docker compose ps` debug step before `setup.sh` +- **curl queries fail:** confirm `jq` is available with `which jq`; check the `ARCADEDB_PASS` env var is picked up by `queries.sh`. Some queries use ArcadeDB 26.3.1-SNAPSHOT features (`time_bucket`, `vectorDistance`, `full_name.similarity`) — if the server version doesn't support them, the query will return an error +- **Java build fails:** the `arcadedb-network:26.3.1-SNAPSHOT` dependency must be available in Maven Central or a configured snapshot repository; if not, `mvn package` will fail resolving dependencies +- **Java run fails:** the fat JAR should be at `target/fraud-detection.jar`; confirm `finalName` in `pom.xml` matches + +**Step 3: No further commit needed if both pass** diff --git a/docs/plans/2026-03-02-fraud-detection-design.md b/docs/plans/2026-03-02-fraud-detection-design.md new file mode 100644 index 0000000..d9f8741 --- /dev/null +++ b/docs/plans/2026-03-02-fraud-detection-design.md @@ -0,0 +1,243 @@ +# Fraud Detection Use Case — Design + +**Date:** 2026-03-02 +**Branch:** feat/fraud-detection +**ArcadeDB version:** 26.3.1-SNAPSHOT + +## Overview + +Implement the [ArcadeDB Fraud Detection](https://arcadedb.com/fraud-detection.html) use case as the second entry in the `arcadedb-usecases` repository. The use case demonstrates ArcadeDB's ability to unify four detection capabilities — graph relationship analysis, vector-based behavioral anomaly detection, time-series pattern identification, and full-text fuzzy matching — in a single multi-model database. + +## Repository Structure + +Self-contained directory, same layout as the recommendation-engine: + +``` +fraud-detection/ +├── README.md +├── docker-compose.yml +├── setup.sh +├── sql/ +│ ├── 01-schema.sql +│ └── 02-data.sql +├── queries/ +│ └── queries.sh +└── java/ + ├── pom.xml + └── src/main/java/ + └── com/arcadedb/examples/FraudDetection.java +``` + +## Docker Compose + +- Single service: `arcadedata/arcadedb:26.3.1-SNAPSHOT` +- HTTP API port exposed: `2480` +- Root credentials passed as environment variables (`JAVA_OPTS: -Darcadedb.server.rootPassword=arcadedb`) +- Healthcheck on `/api/v1/ready` + +## Schema (`sql/01-schema.sql`) + +Eight vertex types, six edge types, one document type. + +**Vertices:** +- `Account` — `id` (STRING), `name` (STRING), `full_name` (STRING), `ssn` (STRING), `credit_limit` (FLOAT), `balance` (FLOAT) +- `Customer` — `id` (STRING), `baseline_behavior` (STRING), `recent_behavior` (STRING), `profile_embedding` (LIST) +- `Device` — `id` (STRING), `fingerprint` (STRING) +- `Phone` — `number` (STRING) +- `Address` — `street` (STRING), `city` (STRING), `zip` (STRING) +- `Email` — `address` (STRING) +- `Beneficiary` — `id` (STRING), `name` (STRING) +- `Transaction` — `id` (STRING), `amount` (FLOAT), `merchant` (STRING), `behavior_embedding` (LIST), `ts` (DATETIME) + +**Edges:** +- `USES_DEVICE` — Account → Device +- `HAS_PHONE` — Account → Phone +- `HAS_ADDRESS` — Account → Address +- `HAS_EMAIL` — Account → Email +- `TRANSFERRED_TO` — Account → Account (properties: `amount` FLOAT, `ts` DATETIME) +- `BENEFICIARY_OF` — Account → Beneficiary + +**Document types:** +- `Deposit` — `account_id` (STRING), `amount` (FLOAT), `ts` (DATETIME) + +**Indexes:** +- `UNIQUE` on `Account(id)`, `Customer(id)`, `Transaction(id)` +- `LSM_VECTOR` on `Customer(profile_embedding)` — 8 dimensions, COSINE +- `LSM_VECTOR` on `Transaction(behavior_embedding)` — 8 dimensions, COSINE +- `FULL_TEXT` on `Account(full_name)` + +## Sample Data (`sql/02-data.sql`) + +Approximately 60–70 records across 11 accounts with distinct fraud patterns: + +**Fraud Ring (accounts A–E):** +- 5 accounts sharing one Device (`dev-shared`) and one Phone (`phone-shared`) +- Circular transfers A→B→C→D→E→A, amounts $8,000–$9,500, spread over 30 days +- Each account has its own unique Email +- 3+ deposits per day in the $8,000–$9,999 range (structuring pattern) + +**Synthetic Identity Pair (accounts F–G):** +- `acct-F` ("Robert J. Smith", SSN "123-45-6789") and `acct-G` ("Rob Smith Jr.", same SSN) +- Same Address, different Phones and Emails +- `full_name` similarity between 0.4–0.9 + +**Velocity Attacker (account H):** +- 10+ transactions in a 5-minute window +- `behavior_embedding` deviates significantly from Customer `profile_embedding` (vectorDistance > 0.7) + +**Legitimate Accounts (L1–L3):** +- Each has unique Device, Phone, Address, Email +- Normal transfer patterns, occasional deposits of varying amounts +- `behavior_embedding` close to `profile_embedding` (vectorDistance < 0.3) + +**Customers:** +- 11 Customer records (one per account) with 8-dimensional `profile_embedding` vectors +- Fraud ring members share similar embeddings; legitimate accounts have distinct profiles + +All embeddings use 8-dimensional float arrays. + +## Queries + +Eight query patterns covering all four signal types: + +| # | Pattern | Language | Signal Type | +|---|---------|----------|-------------| +| 1 | Fraud Ring Detection | Cypher | Graph | +| 2 | Synthetic Identity Resolution | SQL | Full-Text | +| 3 | Circular Money Flow | Cypher | Graph | +| 4 | Structuring Detection | SQL | Time-Series | +| 5 | Behavioral Anomaly | SQL | Vector | +| 6 | Velocity Attack Detection | SQL | Time-Series | +| 7 | Correlated Account Activity | SQL | Time-Series | +| 8 | Multi-Model Investigation | SQL | Combined | + +### Query 1: Fraud Ring Detection (Graph Traversal) + +Multi-hop traversal through shared identifiers to find accounts connected to a flagged account: + +```cypher +MATCH (flagged:Account {id: 'acct-A'}) + -[:USES_DEVICE|HAS_PHONE|HAS_ADDRESS*1..4]- + (connected:Account) +WHERE connected <> flagged +RETURN DISTINCT connected.id, connected.name +``` + +### Query 2: Synthetic Identity Resolution (Full-Text) + +Fuzzy matching on `full_name` where SSN matches but names differ: + +```sql +SELECT a.id, b.id, a.full_name, b.full_name +FROM Account AS a, Account AS b +WHERE a.ssn = b.ssn + AND a.id < b.id + AND a.full_name.similarity(b.full_name) BETWEEN 0.4 AND 0.9 +``` + +### Query 3: Circular Money Flow (Graph Cycles) + +Detect circular transfer paths returning to origin within 30 days: + +```cypher +MATCH path = (origin:Account)-[:TRANSFERRED_TO*3..6]->(origin) +WHERE all(t IN relationships(path) + WHERE t.ts > datetime() - duration('P30D')) +RETURN origin.id, [n IN nodes(path) | n.id] AS chain +``` + +### Query 4: Structuring Detection (Time-Series Bucketing) + +Flag accounts making 3+ deposits per day in the $8,000–$9,999 range: + +```sql +SELECT time_bucket('1d', ts) AS day, account_id, count(*) AS deposit_count +FROM Deposit +WHERE amount BETWEEN 8000 AND 9999 +GROUP BY day, account_id +HAVING deposit_count >= 3 +``` + +### Query 5: Behavioral Anomaly (Vector Distance) + +Detect transactions whose behavioral embedding deviates from the customer's profile: + +```sql +SELECT t.id, t.amount, t.merchant, + vectorDistance(t.behavior_embedding, c.profile_embedding) AS deviation +FROM Transaction t +JOIN Customer c ON t.customer_id = c.id +WHERE vectorDistance(t.behavior_embedding, c.profile_embedding) > 0.7 +ORDER BY deviation DESC +``` + +### Query 6: Velocity Attack Detection (Time-Series Rate) + +Detect accounts with abnormally high transaction rates over a 5-minute window: + +```sql +SELECT account_id, rate(ts) AS current_tps +FROM Transaction +WHERE ts > now() - INTERVAL '5m' +GROUP BY account_id +HAVING current_tps > 2 +``` + +### Query 7: Correlated Account Activity (Time-Series Correlation) + +Detect coordinated transfer activity between two accounts: + +```sql +SELECT correlate(a.amount, b.amount) AS correlation +FROM Transaction a, Transaction b +WHERE a.account_id = 'acct-A' AND b.account_id = 'acct-B' + AND a.ts > now() - INTERVAL '30d' + AND b.ts > now() - INTERVAL '30d' +``` + +### Query 8: Multi-Model Investigation (Combined) + +Composite risk score blending graph distance, temporal patterns, and behavioral deviation. Starts with graph traversal to find connected accounts, enriches with velocity and vector anomaly scores. + +## curl Queries (`queries/queries.sh`) + +Eight labeled sections, one per query pattern, each POSTing to `http://localhost:2480/api/v1/query/FraudDetection`. Same `query()` helper function as the recommendation-engine. + +All queries use hardcoded values matching `02-data.sql` (known account IDs, the shared device, the synthetic identity SSN) so the script works out-of-the-box after setup. + +## Java Program (`java/`) + +- **Build tool:** Maven (standalone `pom.xml`, no parent) +- **Dependency:** `com.arcadedb:arcadedb-network:26.3.1-SNAPSHOT` +- **Output:** executable fat JAR via `maven-assembly-plugin` (`mvn package` → `java -jar target/fraud-detection.jar`) +- **Entry point:** single `FraudDetection.java` with a `main` method that: + 1. Opens a `RemoteDatabase` connection to `localhost:2480` + 2. Runs all 8 queries sequentially, each wrapped in `tryRun()` + 3. Prints a header and formatted results for each query to stdout + 4. Closes the connection + +## Query Language Mapping + +| # | Pattern | Language | +|---|---------|----------| +| 1 | Fraud Ring Detection | Cypher | +| 2 | Synthetic Identity Resolution | SQL | +| 3 | Circular Money Flow | Cypher | +| 4 | Structuring Detection | SQL | +| 5 | Behavioral Anomaly | SQL | +| 6 | Velocity Attack Detection | SQL | +| 7 | Correlated Account Activity | SQL | +| 8 | Multi-Model Investigation | SQL | + +## Success Criteria + +- `docker compose up` starts ArcadeDB 26.3.1-SNAPSHOT successfully +- SQL files apply cleanly via `setup.sh` with no errors +- `queries.sh` runs all 8 queries and returns non-empty result sets +- `mvn package && java -jar target/fraud-detection.jar` runs all 8 queries and prints results to stdout +- Fraud ring query returns accounts B–E when investigating account A +- Synthetic identity query returns the F/G pair +- Circular flow query detects the A→B→C→D→E→A cycle +- Structuring query flags fraud ring accounts with 3+ sub-$10K deposits per day +- Behavioral anomaly query flags account H's transactions +- Velocity query flags account H diff --git a/fraud-detection/README.md b/fraud-detection/README.md new file mode 100644 index 0000000..f96d76a --- /dev/null +++ b/fraud-detection/README.md @@ -0,0 +1,101 @@ +# Fraud Detection + +Demonstrates ArcadeDB's multi-model capabilities by implementing a fraud detection +system that unifies four signal types in a single database: + +- **Graph traversal** — fraud ring detection via shared identifier patterns +- **Vector similarity** — behavioral anomaly detection using embeddings +- **Time-series** — structuring and velocity attack detection via temporal analysis +- **Full-text fuzzy matching** — synthetic identity resolution via name similarity + +## Prerequisites + +- Docker and Docker Compose +- `curl` and `jq` +- Java 17+ and Maven 3.x (for the Java demo) + +## Quickstart + +### 1. Start ArcadeDB + +```bash +docker compose up -d +``` + +### 2. Create database and load data + +```bash +./setup.sh +``` + +This creates the `FraudDetection` database, applies the schema, and inserts sample data. + +### 3a. Run queries via curl + +```bash +./queries/queries.sh +``` + +### 3b. Run queries via Java + +```bash +cd java +mvn package -q +java -jar target/fraud-detection.jar +``` + +## Schema + +| Type | Kind | Key properties | +|------|------|----------------| +| `Account` | Vertex | `id`, `name`, `full_name`, `ssn`, `credit_limit`, `balance` | +| `Customer` | Vertex | `id`, `baseline_behavior`, `recent_behavior`, `profile_embedding` | +| `Device` | Vertex | `id`, `fingerprint` | +| `Phone` | Vertex | `number` | +| `Address` | Vertex | `street`, `city`, `zip` | +| `Email` | Vertex | `address` | +| `Beneficiary` | Vertex | `id`, `name` | +| `Transaction` | Vertex | `id`, `amount`, `merchant`, `behavior_embedding`, `ts` | +| `USES_DEVICE` | Edge | Account → Device | +| `HAS_PHONE` | Edge | Account → Phone | +| `HAS_ADDRESS` | Edge | Account → Address | +| `HAS_EMAIL` | Edge | Account → Email | +| `TRANSFERRED_TO` | Edge | Account → Account (`amount`, `ts`) | +| `BENEFICIARY_OF` | Edge | Account → Beneficiary | +| `Deposit` | Document | `account_id`, `amount`, `ts` | + +## Query Patterns + +| # | Pattern | Language | Signal type | +|---|---------|----------|-------------| +| 1 | Fraud Ring Detection | Cypher | Graph | +| 2 | Synthetic Identity Resolution | SQL + full_name.similarity() | Full-Text | +| 3 | Circular Money Flow | Cypher | Graph | +| 4 | Structuring Detection | SQL + time_bucket() | Time-Series | +| 5 | Behavioral Anomaly | SQL + vectorDistance() | Vector | +| 6 | Velocity Attack Detection | SQL | Time-Series | +| 7 | Correlated Account Activity | SQL | Time-Series | +| 8 | Multi-Model Investigation | SQL + MATCH | Combined | + +## Sample Data + +- 11 accounts across four profiles: fraud ring (A–E), synthetic identity pair (F–G), + velocity attacker (H), and legitimate users (L1–L3) +- 11 customers with 8-dimensional profile embedding vectors +- Shared Device and Phone for fraud ring members; unique identifiers for others +- Circular transfers A→B→C→D→E→A ($8K–$9.5K over 30 days) +- 10 rapid-fire transactions for account H (velocity pattern) +- Structuring deposits: 3+ per day in the $8K–$9,999 range for fraud ring +- Normal transactions and deposits for legitimate accounts + +## ArcadeDB Version Notes + +This use case targets ArcadeDB **26.3.1-SNAPSHOT**. It uses: +- `vectorDistance()` for behavioral anomaly detection with `LSM_VECTOR` indexes +- `time_bucket('1d', ts)` for time-series bucketing +- `full_name.similarity()` for full-text fuzzy matching with a `FULL_TEXT` index +- Cypher queries for graph traversal and cycle detection + +## Reference + +[ArcadeDB Fraud Detection use case](https://arcadedb.com/fraud-detection.html) diff --git a/fraud-detection/docker-compose.yml b/fraud-detection/docker-compose.yml new file mode 100644 index 0000000..011f8bc --- /dev/null +++ b/fraud-detection/docker-compose.yml @@ -0,0 +1,13 @@ +services: + arcadedb: + image: arcadedata/arcadedb:26.3.1-SNAPSHOT + ports: + - "2480:2480" + environment: + JAVA_OPTS: "-Darcadedb.server.rootPassword=arcadedb" + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:2480/api/v1/ready"] + interval: 5s + timeout: 3s + retries: 20 + start_period: 10s diff --git a/fraud-detection/java/pom.xml b/fraud-detection/java/pom.xml new file mode 100644 index 0000000..b9dfc39 --- /dev/null +++ b/fraud-detection/java/pom.xml @@ -0,0 +1,57 @@ + + + 4.0.0 + + com.arcadedb.examples + fraud-detection + 1.0-SNAPSHOT + jar + + + 21 + 21 + UTF-8 + 26.3.1-SNAPSHOT + + + + + com.arcadedb + arcadedb-network + ${arcadedb.version} + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.8.0 + + + + com.arcadedb.examples.FraudDetection + + + + jar-with-dependencies + + fraud-detection + false + + + + make-assembly + package + + single + + + + + + + diff --git a/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java new file mode 100644 index 0000000..e84b142 --- /dev/null +++ b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java @@ -0,0 +1,250 @@ +package com.arcadedb.examples; + +import com.arcadedb.query.sql.executor.Result; +import com.arcadedb.query.sql.executor.ResultSet; +import com.arcadedb.remote.RemoteDatabase; + +public class FraudDetection { + + private static final String HOST = System.getenv().getOrDefault("ARCADEDB_HOST", "localhost"); + private static final int PORT = Integer.parseInt(System.getenv().getOrDefault("ARCADEDB_PORT", "2480")); + private static final String DB_NAME = "FraudDetection"; + private static final String USER = System.getenv().getOrDefault("ARCADEDB_USER", "root"); + private static final String PASSWORD = System.getenv().getOrDefault("ARCADEDB_PASS", "arcadedb"); + + public static void main(String[] args) { + try (RemoteDatabase db = new RemoteDatabase(HOST, PORT, DB_NAME, USER, PASSWORD)) { + tryRun(() -> runQuery1FraudRing(db), "Query 1"); + tryRun(() -> runQuery2SyntheticIdentity(db), "Query 2"); + tryRun(() -> runQuery3CircularFlow(db), "Query 3"); + tryRun(() -> runQuery4Structuring(db), "Query 4"); + tryRun(() -> runQuery5BehavioralAnomaly(db), "Query 5"); + tryRun(() -> runQuery6VelocityAttack(db), "Query 6"); + tryRun(() -> runQuery7CorrelatedActivity(db), "Query 7"); + tryRun(() -> runQuery8MultiModel(db), "Query 8"); + } + System.out.println("\nAll queries complete."); + } + + private static void tryRun(Runnable r, String name) { + try { + r.run(); + } catch (Exception e) { + System.err.println("[" + name + " FAILED] " + e.getMessage()); + } + } + + // Query 1: Fraud Ring Detection (Graph Traversal) + private static void runQuery1FraudRing(RemoteDatabase db) { + printHeader("Query 1: Fraud Ring Detection (Graph Traversal)", + "Find accounts connected to acct-A through shared identifiers."); + + String cypher = + """ + MATCH (flagged:Account {id: 'acct-A'}) + -[:USES_DEVICE|HAS_PHONE|HAS_ADDRESS*1..4]- + (connected:Account) + WHERE connected <> flagged + RETURN DISTINCT connected.id, connected.name"""; + + try (ResultSet rs = db.query("cypher", cypher)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-12s | %s%n", + r.getProperty("connected.id"), + r.getProperty("connected.name")); + } + } + } + + // Query 2: Synthetic Identity Resolution (Full-Text) + private static void runQuery2SyntheticIdentity(RemoteDatabase db) { + printHeader("Query 2: Synthetic Identity Resolution (Full-Text)", + "Find accounts with matching SSN but fuzzy-similar names."); + + String sql = + """ + SELECT a.id, b.id AS b_id, a.full_name, b.full_name AS b_full_name + FROM Account AS a, Account AS b + WHERE a.ssn = b.ssn + AND a.id < b.id + AND a.full_name.similarity(b.full_name) BETWEEN 0.4 AND 0.9"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-10s | %-10s | %-20s | %s%n", + r.getProperty("id"), + r.getProperty("b_id"), + r.getProperty("full_name"), + r.getProperty("b_full_name")); + } + } + } + + // Query 3: Circular Money Flow (Graph Cycles) + private static void runQuery3CircularFlow(RemoteDatabase db) { + printHeader("Query 3: Circular Money Flow (Graph Cycles)", + "Detect circular transfer paths returning to origin within 30 days."); + + String cypher = + """ + MATCH path = (origin:Account)-[:TRANSFERRED_TO*3..6]->(origin) + WHERE all(t IN relationships(path) + WHERE t.ts > datetime() - duration('P30D')) + RETURN origin.id, [n IN nodes(path) | n.id] AS chain"""; + + try (ResultSet rs = db.query("cypher", cypher)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" origin: %-10s | chain: %s%n", + r.getProperty("origin.id"), + r.getProperty("chain")); + } + } + } + + // Query 4: Structuring Detection (Time-Series) + private static void runQuery4Structuring(RemoteDatabase db) { + printHeader("Query 4: Structuring Detection (Time-Series)", + "Flag accounts making 3+ deposits per day in the $8,000-$9,999 range."); + + String sql = + """ + SELECT time_bucket('1d', ts) AS day, account_id, count(*) AS deposit_count + FROM Deposit + WHERE amount BETWEEN 8000 AND 9999 + GROUP BY day, account_id + HAVING deposit_count >= 3"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" day: %-12s | account: %-10s | deposits: %s%n", + r.getProperty("day"), + r.getProperty("account_id"), + r.getProperty("deposit_count")); + } + } + } + + // Query 5: Behavioral Anomaly (Vector Distance) + private static void runQuery5BehavioralAnomaly(RemoteDatabase db) { + printHeader("Query 5: Behavioral Anomaly (Vector Distance)", + "Detect transactions whose behavioral embedding deviates from the customer profile."); + + String sql = + """ + SELECT t.id, t.amount, t.merchant, + vectorDistance(t.behavior_embedding, c.profile_embedding) AS deviation + FROM Transaction t + JOIN Customer c ON t.account_id = c.id + WHERE vectorDistance(t.behavior_embedding, c.profile_embedding) > 0.7 + ORDER BY deviation DESC"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-10s | $%-10.2f | %-15s | deviation: %s%n", + r.getProperty("id"), + ((Number) r.getProperty("amount")).doubleValue(), + r.getProperty("merchant"), + r.getProperty("deviation")); + } + } + } + + // Query 6: Velocity Attack Detection (Time-Series) + private static void runQuery6VelocityAttack(RemoteDatabase db) { + printHeader("Query 6: Velocity Attack Detection (Time-Series)", + "Detect accounts with abnormally high transaction rates in a 5-minute window."); + + String sql = + """ + SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn + FROM Transaction + WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' + GROUP BY account_id + HAVING txn_count > 5"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" account: %-10s | txns: %-5s | from: %s | to: %s%n", + r.getProperty("account_id"), + r.getProperty("txn_count"), + r.getProperty("first_txn"), + r.getProperty("last_txn")); + } + } + } + + // Query 7: Correlated Account Activity (Time-Series) + private static void runQuery7CorrelatedActivity(RemoteDatabase db) { + printHeader("Query 7: Correlated Account Activity (Time-Series)", + "Detect coordinated transfer amounts between two accounts."); + + String sql = + """ + SELECT a.account_id AS account_a, b.account_id AS account_b, + avg(a.amount) AS avg_a, avg(b.amount) AS avg_b, + count(*) AS matching_txns + FROM Transaction a, Transaction b + WHERE a.account_id = 'acct-A' AND b.account_id = 'acct-B' + AND a.ts >= '2026-02-01T00:00:00Z' + AND b.ts >= '2026-02-01T00:00:00Z'"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %s <-> %s | avg_a: %s | avg_b: %s | matching: %s%n", + r.getProperty("account_a"), + r.getProperty("account_b"), + r.getProperty("avg_a"), + r.getProperty("avg_b"), + r.getProperty("matching_txns")); + } + } + } + + // Query 8: Multi-Model Investigation (Combined) + private static void runQuery8MultiModel(RemoteDatabase db) { + printHeader("Query 8: Multi-Model Investigation (Combined)", + "Composite risk score blending graph connectivity, velocity, and behavioral deviation."); + + String sql = + """ + SELECT a.id, a.name, + (SELECT count(*) FROM ( + MATCH {type: Account, where: (id = a.id)} + .bothE('USES_DEVICE','HAS_PHONE','HAS_ADDRESS'){} + .bothV(){where: (id != a.id), as: linked} + RETURN linked + )) AS shared_identifiers, + (SELECT count(*) FROM Transaction WHERE account_id = a.id) AS txn_count, + c.recent_behavior + FROM Account a + JOIN Customer c ON a.id = c.id + WHERE c.recent_behavior IN ['suspicious', 'anomalous'] + ORDER BY shared_identifiers DESC"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-10s | %-10s | shared: %-5s | txns: %-5s | behavior: %s%n", + r.getProperty("id"), + r.getProperty("name"), + r.getProperty("shared_identifiers"), + r.getProperty("txn_count"), + r.getProperty("recent_behavior")); + } + } + } + + private static void printHeader(String title, String description) { + System.out.println("\n" + "=".repeat(70)); + System.out.println(" " + title); + System.out.println(" " + description); + System.out.println("=".repeat(70)); + } +} diff --git a/fraud-detection/queries/queries.sh b/fraud-detection/queries/queries.sh new file mode 100755 index 0000000..78c988e --- /dev/null +++ b/fraud-detection/queries/queries.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# Fraud Detection — all eight query patterns via curl +# Prerequisites: ArcadeDB running, setup.sh already executed, jq installed +# Usage: ./queries/queries.sh + +set -euo pipefail + +ARCADEDB_URL="${ARCADEDB_URL:-http://localhost:2480}" +ARCADEDB_USER="${ARCADEDB_USER:-root}" +ARCADEDB_PASS="${ARCADEDB_PASS:-arcadedb}" +AUTH="${ARCADEDB_USER}:${ARCADEDB_PASS}" +DB="FraudDetection" +QUERY_URL="${ARCADEDB_URL}/api/v1/query/${DB}" + +query() { + local lang="$1" cmd="$2" + jq -cn --arg l "$lang" --arg c "$cmd" '{"language":$l,"command":$c}' \ + | curl -sf -u "$AUTH" -X POST "$QUERY_URL" \ + -H "Content-Type: application/json" -d @- \ + | jq '.result' +} + +# ───────────────────────────────────────────────────────────────────────────── +echo "=== Query 1: Fraud Ring Detection (Graph Traversal) ===" +echo "Find accounts connected to a flagged account through shared identifiers." +echo "" +query "cypher" " +MATCH (flagged:Account {id: 'acct-A'}) + -[:USES_DEVICE|HAS_PHONE|HAS_ADDRESS*1..4]- + (connected:Account) +WHERE connected <> flagged +RETURN DISTINCT connected.id, connected.name +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 2: Synthetic Identity Resolution (Full-Text) ===" +echo "Find accounts with matching SSN but fuzzy-similar names." +echo "" +query "sql" " +SELECT a.id, b.id AS b_id, a.full_name, b.full_name AS b_full_name +FROM Account AS a, Account AS b +WHERE a.ssn = b.ssn + AND a.id < b.id + AND a.full_name.similarity(b.full_name) BETWEEN 0.4 AND 0.9 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 3: Circular Money Flow (Graph Cycles) ===" +echo "Detect circular transfer paths returning to origin within 30 days." +echo "" +query "cypher" " +MATCH path = (origin:Account)-[:TRANSFERRED_TO*3..6]->(origin) +WHERE all(t IN relationships(path) + WHERE t.ts > datetime() - duration('P30D')) +RETURN origin.id, [n IN nodes(path) | n.id] AS chain +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 4: Structuring Detection (Time-Series) ===" +echo "Flag accounts making 3+ deposits per day in the \$8,000–\$9,999 range." +echo "" +query "sql" " +SELECT time_bucket('1d', ts) AS day, account_id, count(*) AS deposit_count +FROM Deposit +WHERE amount BETWEEN 8000 AND 9999 +GROUP BY day, account_id +HAVING deposit_count >= 3 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 5: Behavioral Anomaly (Vector Distance) ===" +echo "Detect transactions whose behavioral embedding deviates from the customer profile." +echo "" +query "sql" " +SELECT t.id, t.amount, t.merchant, + vectorDistance(t.behavior_embedding, c.profile_embedding) AS deviation +FROM Transaction t +JOIN Customer c ON t.account_id = c.id +WHERE vectorDistance(t.behavior_embedding, c.profile_embedding) > 0.7 +ORDER BY deviation DESC +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 6: Velocity Attack Detection (Time-Series) ===" +echo "Detect accounts with abnormally high transaction rates in a 5-minute window." +echo "" +query "sql" " +SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn +FROM Transaction +WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' +GROUP BY account_id +HAVING txn_count > 5 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 7: Correlated Account Activity (Time-Series) ===" +echo "Detect coordinated transfer amounts between two accounts." +echo "" +query "sql" " +SELECT a.account_id AS account_a, b.account_id AS account_b, + avg(a.amount) AS avg_a, avg(b.amount) AS avg_b, + count(*) AS matching_txns +FROM Transaction a, Transaction b +WHERE a.account_id = 'acct-A' AND b.account_id = 'acct-B' + AND a.ts >= '2026-02-01T00:00:00Z' + AND b.ts >= '2026-02-01T00:00:00Z' +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 8: Multi-Model Investigation (Combined) ===" +echo "Composite risk score blending graph connectivity, velocity, and behavioral deviation." +echo "" +query "sql" " +SELECT a.id, a.name, + (SELECT count(*) FROM ( + MATCH {type: Account, where: (id = a.id)} + .bothE('USES_DEVICE','HAS_PHONE','HAS_ADDRESS'){} + .bothV(){where: (id != a.id), as: linked} + RETURN linked + )) AS shared_identifiers, + (SELECT count(*) FROM Transaction WHERE account_id = a.id) AS txn_count, + c.recent_behavior +FROM Account a +JOIN Customer c ON a.id = c.id +WHERE c.recent_behavior IN ['suspicious', 'anomalous'] +ORDER BY shared_identifiers DESC +" diff --git a/fraud-detection/setup.sh b/fraud-detection/setup.sh new file mode 100755 index 0000000..0c1b5c9 --- /dev/null +++ b/fraud-detection/setup.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -euo pipefail + +ARCADEDB_URL="${ARCADEDB_URL:-http://localhost:2480}" +ARCADEDB_USER="${ARCADEDB_USER:-root}" +ARCADEDB_PASS="${ARCADEDB_PASS:-arcadedb}" +DB_NAME="FraudDetection" + +# ── Wait for ArcadeDB ───────────────────────────────────────────────────────── +echo "Waiting for ArcadeDB at ${ARCADEDB_URL}..." +until curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + "${ARCADEDB_URL}/api/v1/ready" > /dev/null 2>&1; do + sleep 2 +done +echo "ArcadeDB is ready." + +# ── Create database ─────────────────────────────────────────────────────────── +echo "Creating database ${DB_NAME}..." +curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + -X POST "${ARCADEDB_URL}/api/v1/server" \ + -H "Content-Type: application/json" \ + -d "{\"command\": \"create database ${DB_NAME}\"}" > /dev/null || true +echo "Database ready." + +# ── Helper: send one SQL statement ─────────────────────────────────────────── +send_sql() { + local stmt="$1" + jq -cn --arg cmd "$stmt" '{"language":"sql","command":$cmd}' \ + | curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + -X POST "${ARCADEDB_URL}/api/v1/command/${DB_NAME}" \ + -H "Content-Type: application/json" \ + -d @- > /dev/null +} + +# ── Apply a SQL file (one statement per line) ───────────────────────────────── +apply_file() { + local file="$1" + echo "Applying ${file}..." + while IFS= read -r line || [[ -n "$line" ]]; do + # skip blank lines and SQL comments + [[ -z "${line//[[:space:]]/}" || "$line" =~ ^[[:space:]]*-- ]] && continue + send_sql "${line%%;}" + done < "$file" + echo "Done: ${file}" +} + +apply_file "sql/01-schema.sql" +apply_file "sql/02-data.sql" + +echo "" +echo "Setup complete. ${DB_NAME} is ready." diff --git a/fraud-detection/sql/01-schema.sql b/fraud-detection/sql/01-schema.sql new file mode 100644 index 0000000..a1b5e71 --- /dev/null +++ b/fraud-detection/sql/01-schema.sql @@ -0,0 +1,55 @@ +-- Vertex types +CREATE VERTEX TYPE Account IF NOT EXISTS; +CREATE PROPERTY Account.id IF NOT EXISTS STRING; +CREATE PROPERTY Account.name IF NOT EXISTS STRING; +CREATE PROPERTY Account.full_name IF NOT EXISTS STRING; +CREATE PROPERTY Account.ssn IF NOT EXISTS STRING; +CREATE PROPERTY Account.credit_limit IF NOT EXISTS FLOAT; +CREATE PROPERTY Account.balance IF NOT EXISTS FLOAT; +CREATE VERTEX TYPE Customer IF NOT EXISTS; +CREATE PROPERTY Customer.id IF NOT EXISTS STRING; +CREATE PROPERTY Customer.baseline_behavior IF NOT EXISTS STRING; +CREATE PROPERTY Customer.recent_behavior IF NOT EXISTS STRING; +CREATE PROPERTY Customer.profile_embedding IF NOT EXISTS LIST; +CREATE VERTEX TYPE Device IF NOT EXISTS; +CREATE PROPERTY Device.id IF NOT EXISTS STRING; +CREATE PROPERTY Device.fingerprint IF NOT EXISTS STRING; +CREATE VERTEX TYPE Phone IF NOT EXISTS; +CREATE PROPERTY Phone.number IF NOT EXISTS STRING; +CREATE VERTEX TYPE Address IF NOT EXISTS; +CREATE PROPERTY Address.street IF NOT EXISTS STRING; +CREATE PROPERTY Address.city IF NOT EXISTS STRING; +CREATE PROPERTY Address.zip IF NOT EXISTS STRING; +CREATE VERTEX TYPE Email IF NOT EXISTS; +CREATE PROPERTY Email.address IF NOT EXISTS STRING; +CREATE VERTEX TYPE Beneficiary IF NOT EXISTS; +CREATE PROPERTY Beneficiary.id IF NOT EXISTS STRING; +CREATE PROPERTY Beneficiary.name IF NOT EXISTS STRING; +CREATE VERTEX TYPE Transaction IF NOT EXISTS; +CREATE PROPERTY Transaction.id IF NOT EXISTS STRING; +CREATE PROPERTY Transaction.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY Transaction.merchant IF NOT EXISTS STRING; +CREATE PROPERTY Transaction.behavior_embedding IF NOT EXISTS LIST; +CREATE PROPERTY Transaction.ts IF NOT EXISTS DATETIME; +CREATE PROPERTY Transaction.account_id IF NOT EXISTS STRING; +-- Edge types +CREATE EDGE TYPE USES_DEVICE IF NOT EXISTS; +CREATE EDGE TYPE HAS_PHONE IF NOT EXISTS; +CREATE EDGE TYPE HAS_ADDRESS IF NOT EXISTS; +CREATE EDGE TYPE HAS_EMAIL IF NOT EXISTS; +CREATE EDGE TYPE TRANSFERRED_TO IF NOT EXISTS; +CREATE PROPERTY TRANSFERRED_TO.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY TRANSFERRED_TO.ts IF NOT EXISTS DATETIME; +CREATE EDGE TYPE BENEFICIARY_OF IF NOT EXISTS; +-- Document types +CREATE DOCUMENT TYPE Deposit IF NOT EXISTS; +CREATE PROPERTY Deposit.account_id IF NOT EXISTS STRING; +CREATE PROPERTY Deposit.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY Deposit.ts IF NOT EXISTS DATETIME; +-- Indexes +CREATE INDEX IF NOT EXISTS ON Account (id) UNIQUE; +CREATE INDEX IF NOT EXISTS ON Customer (id) UNIQUE; +CREATE INDEX IF NOT EXISTS ON Transaction (id) UNIQUE; +CREATE INDEX IF NOT EXISTS ON Customer (profile_embedding) LSM_VECTOR METADATA { dimensions: 8, similarity: 'COSINE' }; +CREATE INDEX IF NOT EXISTS ON Transaction (behavior_embedding) LSM_VECTOR METADATA { dimensions: 8, similarity: 'COSINE' }; +CREATE INDEX IF NOT EXISTS ON Account (full_name) FULL_TEXT; diff --git a/fraud-detection/sql/02-data.sql b/fraud-detection/sql/02-data.sql new file mode 100644 index 0000000..0fafbdb --- /dev/null +++ b/fraud-detection/sql/02-data.sql @@ -0,0 +1,165 @@ +-- ── Fraud Ring Accounts (A–E) ──────────────────────────────────────────────── +INSERT INTO Account SET id = 'acct-A', name = 'Alice', full_name = 'Alice M. Johnson', ssn = '111-22-3333', credit_limit = 10000.0, balance = 8500.0; +INSERT INTO Account SET id = 'acct-B', name = 'Bob', full_name = 'Bob K. Williams', ssn = '222-33-4444', credit_limit = 10000.0, balance = 9200.0; +INSERT INTO Account SET id = 'acct-C', name = 'Carol', full_name = 'Carol P. Davis', ssn = '333-44-5555', credit_limit = 10000.0, balance = 8800.0; +INSERT INTO Account SET id = 'acct-D', name = 'Dan', full_name = 'Daniel R. Miller', ssn = '444-55-6666', credit_limit = 10000.0, balance = 9100.0; +INSERT INTO Account SET id = 'acct-E', name = 'Eve', full_name = 'Eve S. Wilson', ssn = '555-66-7777', credit_limit = 10000.0, balance = 8700.0; +-- ── Synthetic Identity Pair (F–G) ─────────────────────────────────────────── +INSERT INTO Account SET id = 'acct-F', name = 'Robert', full_name = 'Robert J. Smith', ssn = '123-45-6789', credit_limit = 15000.0, balance = 12000.0; +INSERT INTO Account SET id = 'acct-G', name = 'Rob', full_name = 'Rob Smith Jr.', ssn = '123-45-6789', credit_limit = 8000.0, balance = 5000.0; +-- ── Velocity Attacker (H) ─────────────────────────────────────────────────── +INSERT INTO Account SET id = 'acct-H', name = 'Hank', full_name = 'Hank T. Brown', ssn = '666-77-8888', credit_limit = 5000.0, balance = 200.0; +-- ── Legitimate Accounts (L1–L3) ───────────────────────────────────────────── +INSERT INTO Account SET id = 'acct-L1', name = 'Liam', full_name = 'Liam O. Garcia', ssn = '777-88-9999', credit_limit = 20000.0, balance = 15000.0; +INSERT INTO Account SET id = 'acct-L2', name = 'Lisa', full_name = 'Lisa N. Chen', ssn = '888-99-0000', credit_limit = 25000.0, balance = 22000.0; +INSERT INTO Account SET id = 'acct-L3', name = 'Luke', full_name = 'Luke W. Taylor', ssn = '999-00-1111', credit_limit = 18000.0, balance = 16500.0; +-- ── Customers (one per account) ───────────────────────────────────────────── +INSERT INTO Customer SET id = 'acct-A', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]; +INSERT INTO Customer SET id = 'acct-B', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.88, 0.82, 0.72, 0.58, 0.48, 0.42, 0.32, 0.22]; +INSERT INTO Customer SET id = 'acct-C', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.91, 0.79, 0.68, 0.62, 0.52, 0.38, 0.28, 0.18]; +INSERT INTO Customer SET id = 'acct-D', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.87, 0.83, 0.73, 0.57, 0.47, 0.43, 0.33, 0.23]; +INSERT INTO Customer SET id = 'acct-E', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.92, 0.78, 0.69, 0.61, 0.51, 0.39, 0.29, 0.19]; +INSERT INTO Customer SET id = 'acct-F', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1]; +INSERT INTO Customer SET id = 'acct-G', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.32, 0.38, 0.52, 0.58, 0.72, 0.78, 0.88, 0.12]; +INSERT INTO Customer SET id = 'acct-H', baseline_behavior = 'normal', recent_behavior = 'anomalous', profile_embedding = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]; +INSERT INTO Customer SET id = 'acct-L1', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]; +INSERT INTO Customer SET id = 'acct-L2', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6]; +INSERT INTO Customer SET id = 'acct-L3', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4]; +-- ── Devices ───────────────────────────────────────────────────────────────── +INSERT INTO Device SET id = 'dev-shared', fingerprint = 'fp-AABBCCDD'; +INSERT INTO Device SET id = 'dev-F', fingerprint = 'fp-FF001122'; +INSERT INTO Device SET id = 'dev-G', fingerprint = 'fp-GG334455'; +INSERT INTO Device SET id = 'dev-H', fingerprint = 'fp-HH667788'; +INSERT INTO Device SET id = 'dev-L1', fingerprint = 'fp-L1AABB00'; +INSERT INTO Device SET id = 'dev-L2', fingerprint = 'fp-L2CCDD00'; +INSERT INTO Device SET id = 'dev-L3', fingerprint = 'fp-L3EEFF00'; +-- ── Phones ────────────────────────────────────────────────────────────────── +INSERT INTO Phone SET number = '555-000-RING'; +INSERT INTO Phone SET number = '555-111-FFFF'; +INSERT INTO Phone SET number = '555-222-GGGG'; +INSERT INTO Phone SET number = '555-333-HHHH'; +INSERT INTO Phone SET number = '555-444-LLL1'; +INSERT INTO Phone SET number = '555-555-LLL2'; +INSERT INTO Phone SET number = '555-666-LLL3'; +-- ── Addresses ─────────────────────────────────────────────────────────────── +INSERT INTO Address SET street = '100 Ring Road', city = 'Fraudville', zip = '00001'; +INSERT INTO Address SET street = '200 Synth Ave', city = 'Faketown', zip = '00002'; +INSERT INTO Address SET street = '300 Velocity Blvd', city = 'Speedcity', zip = '00003'; +INSERT INTO Address SET street = '400 Legit Lane', city = 'Realville', zip = '10001'; +INSERT INTO Address SET street = '500 Honest St', city = 'Trustburg', zip = '10002'; +INSERT INTO Address SET street = '600 Genuine Dr', city = 'Goodtown', zip = '10003'; +-- ── Emails ────────────────────────────────────────────────────────────────── +INSERT INTO Email SET address = 'alice@example.com'; +INSERT INTO Email SET address = 'bob@example.com'; +INSERT INTO Email SET address = 'carol@example.com'; +INSERT INTO Email SET address = 'dan@example.com'; +INSERT INTO Email SET address = 'eve@example.com'; +INSERT INTO Email SET address = 'robert@example.com'; +INSERT INTO Email SET address = 'rob@example.com'; +INSERT INTO Email SET address = 'hank@example.com'; +INSERT INTO Email SET address = 'liam@example.com'; +INSERT INTO Email SET address = 'lisa@example.com'; +INSERT INTO Email SET address = 'luke@example.com'; +-- ── Beneficiaries ─────────────────────────────────────────────────────────── +INSERT INTO Beneficiary SET id = 'ben-shell1', name = 'Shell Corp Alpha'; +INSERT INTO Beneficiary SET id = 'ben-shell2', name = 'Shell Corp Beta'; +INSERT INTO Beneficiary SET id = 'ben-legit1', name = 'Acme Supplies'; +-- ── USES_DEVICE edges (fraud ring shares dev-shared) ──────────────────────── +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-F') TO (SELECT FROM Device WHERE id = 'dev-F'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-G') TO (SELECT FROM Device WHERE id = 'dev-G'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-H') TO (SELECT FROM Device WHERE id = 'dev-H'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Device WHERE id = 'dev-L1'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Device WHERE id = 'dev-L2'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-L3') TO (SELECT FROM Device WHERE id = 'dev-L3'); +-- ── HAS_PHONE edges (fraud ring shares phone-shared) ──────────────────────── +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-F') TO (SELECT FROM Phone WHERE number = '555-111-FFFF'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-G') TO (SELECT FROM Phone WHERE number = '555-222-GGGG'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-H') TO (SELECT FROM Phone WHERE number = '555-333-HHHH'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Phone WHERE number = '555-444-LLL1'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Phone WHERE number = '555-555-LLL2'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-L3') TO (SELECT FROM Phone WHERE number = '555-666-LLL3'); +-- ── HAS_ADDRESS edges (F and G share same address) ────────────────────────── +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-F') TO (SELECT FROM Address WHERE street = '200 Synth Ave'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-G') TO (SELECT FROM Address WHERE street = '200 Synth Ave'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-H') TO (SELECT FROM Address WHERE street = '300 Velocity Blvd'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Address WHERE street = '400 Legit Lane'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Address WHERE street = '500 Honest St'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-L3') TO (SELECT FROM Address WHERE street = '600 Genuine Dr'); +-- ── HAS_EMAIL edges ───────────────────────────────────────────────────────── +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Email WHERE address = 'alice@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Email WHERE address = 'bob@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Email WHERE address = 'carol@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Email WHERE address = 'dan@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Email WHERE address = 'eve@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-F') TO (SELECT FROM Email WHERE address = 'robert@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-G') TO (SELECT FROM Email WHERE address = 'rob@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-H') TO (SELECT FROM Email WHERE address = 'hank@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Email WHERE address = 'liam@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Email WHERE address = 'lisa@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-L3') TO (SELECT FROM Email WHERE address = 'luke@example.com'); +-- ── BENEFICIARY_OF edges ──────────────────────────────────────────────────── +CREATE EDGE BENEFICIARY_OF FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Beneficiary WHERE id = 'ben-shell1'); +CREATE EDGE BENEFICIARY_OF FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Beneficiary WHERE id = 'ben-shell1'); +CREATE EDGE BENEFICIARY_OF FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Beneficiary WHERE id = 'ben-shell2'); +CREATE EDGE BENEFICIARY_OF FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Beneficiary WHERE id = 'ben-legit1'); +-- ── TRANSFERRED_TO edges (circular: A→B→C→D→E→A) ─────────────────────────── +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Account WHERE id = 'acct-B') SET amount = 9000.0, ts = '2026-02-05T10:00:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Account WHERE id = 'acct-C') SET amount = 8500.0, ts = '2026-02-10T14:30:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Account WHERE id = 'acct-D') SET amount = 9200.0, ts = '2026-02-15T09:15:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Account WHERE id = 'acct-E') SET amount = 8800.0, ts = '2026-02-20T16:45:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Account WHERE id = 'acct-A') SET amount = 9500.0, ts = '2026-02-25T11:20:00Z'; +-- Normal transfers for legitimate accounts +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Account WHERE id = 'acct-L2') SET amount = 500.0, ts = '2026-02-18T08:00:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Account WHERE id = 'acct-L3') SET amount = 250.0, ts = '2026-02-22T12:00:00Z'; +-- ── Transactions (velocity attack for H — 10 txns in 5 minutes) ───────────── +INSERT INTO Transaction SET id = 'txn-H01', account_id = 'acct-H', amount = 499.99, merchant = 'QuickMart', behavior_embedding = [0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.9], ts = '2026-03-01T13:00:00Z'; +INSERT INTO Transaction SET id = 'txn-H02', account_id = 'acct-H', amount = 489.50, merchant = 'FastShop', behavior_embedding = [0.85, 0.15, 0.1, 0.1, 0.1, 0.1, 0.15, 0.85], ts = '2026-03-01T13:00:30Z'; +INSERT INTO Transaction SET id = 'txn-H03', account_id = 'acct-H', amount = 475.00, merchant = 'SpeedBuy', behavior_embedding = [0.88, 0.12, 0.1, 0.1, 0.1, 0.1, 0.12, 0.88], ts = '2026-03-01T13:01:00Z'; +INSERT INTO Transaction SET id = 'txn-H04', account_id = 'acct-H', amount = 450.00, merchant = 'RushStore', behavior_embedding = [0.92, 0.08, 0.1, 0.1, 0.1, 0.1, 0.08, 0.92], ts = '2026-03-01T13:01:30Z'; +INSERT INTO Transaction SET id = 'txn-H05', account_id = 'acct-H', amount = 510.00, merchant = 'QuickMart', behavior_embedding = [0.87, 0.13, 0.1, 0.1, 0.1, 0.1, 0.13, 0.87], ts = '2026-03-01T13:02:00Z'; +INSERT INTO Transaction SET id = 'txn-H06', account_id = 'acct-H', amount = 495.00, merchant = 'FastShop', behavior_embedding = [0.91, 0.09, 0.1, 0.1, 0.1, 0.1, 0.09, 0.91], ts = '2026-03-01T13:02:30Z'; +INSERT INTO Transaction SET id = 'txn-H07', account_id = 'acct-H', amount = 520.00, merchant = 'SpeedBuy', behavior_embedding = [0.86, 0.14, 0.1, 0.1, 0.1, 0.1, 0.14, 0.86], ts = '2026-03-01T13:03:00Z'; +INSERT INTO Transaction SET id = 'txn-H08', account_id = 'acct-H', amount = 480.00, merchant = 'RushStore', behavior_embedding = [0.93, 0.07, 0.1, 0.1, 0.1, 0.1, 0.07, 0.93], ts = '2026-03-01T13:03:30Z'; +INSERT INTO Transaction SET id = 'txn-H09', account_id = 'acct-H', amount = 465.00, merchant = 'QuickMart', behavior_embedding = [0.89, 0.11, 0.1, 0.1, 0.1, 0.1, 0.11, 0.89], ts = '2026-03-01T13:04:00Z'; +INSERT INTO Transaction SET id = 'txn-H10', account_id = 'acct-H', amount = 505.00, merchant = 'FastShop', behavior_embedding = [0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.9], ts = '2026-03-01T13:04:30Z'; +-- Normal transactions for legitimate accounts (behavior close to profile) +INSERT INTO Transaction SET id = 'txn-L1-01', account_id = 'acct-L1', amount = 45.00, merchant = 'Grocery Store', behavior_embedding = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], ts = '2026-02-28T09:00:00Z'; +INSERT INTO Transaction SET id = 'txn-L1-02', account_id = 'acct-L1', amount = 120.00, merchant = 'Gas Station', behavior_embedding = [0.48, 0.52, 0.5, 0.5, 0.5, 0.5, 0.48, 0.52], ts = '2026-02-28T14:00:00Z'; +INSERT INTO Transaction SET id = 'txn-L2-01', account_id = 'acct-L2', amount = 85.00, merchant = 'Restaurant', behavior_embedding = [0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6], ts = '2026-02-27T18:30:00Z'; +INSERT INTO Transaction SET id = 'txn-L3-01', account_id = 'acct-L3', amount = 200.00, merchant = 'Department Store', behavior_embedding = [0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4], ts = '2026-02-26T11:00:00Z'; +-- Transactions for fraud ring (amounts for correlation query) +INSERT INTO Transaction SET id = 'txn-A01', account_id = 'acct-A', amount = 9000.0, merchant = 'Transfer', behavior_embedding = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2], ts = '2026-02-05T10:00:00Z'; +INSERT INTO Transaction SET id = 'txn-A02', account_id = 'acct-A', amount = 8500.0, merchant = 'Transfer', behavior_embedding = [0.88, 0.82, 0.72, 0.58, 0.48, 0.42, 0.32, 0.22], ts = '2026-02-10T14:00:00Z'; +INSERT INTO Transaction SET id = 'txn-A03', account_id = 'acct-A', amount = 9200.0, merchant = 'Transfer', behavior_embedding = [0.91, 0.79, 0.68, 0.62, 0.52, 0.38, 0.28, 0.18], ts = '2026-02-15T09:00:00Z'; +INSERT INTO Transaction SET id = 'txn-B01', account_id = 'acct-B', amount = 8500.0, merchant = 'Transfer', behavior_embedding = [0.88, 0.82, 0.72, 0.58, 0.48, 0.42, 0.32, 0.22], ts = '2026-02-10T14:30:00Z'; +INSERT INTO Transaction SET id = 'txn-B02', account_id = 'acct-B', amount = 9200.0, merchant = 'Transfer', behavior_embedding = [0.87, 0.83, 0.73, 0.57, 0.47, 0.43, 0.33, 0.23], ts = '2026-02-15T09:30:00Z'; +INSERT INTO Transaction SET id = 'txn-B03', account_id = 'acct-B', amount = 8800.0, merchant = 'Transfer', behavior_embedding = [0.92, 0.78, 0.69, 0.61, 0.51, 0.39, 0.29, 0.19], ts = '2026-02-20T16:00:00Z'; +-- ── Deposits (structuring pattern for fraud ring) ─────────────────────────── +INSERT INTO Deposit SET account_id = 'acct-A', amount = 9500.0, ts = '2026-02-05T08:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-A', amount = 9800.0, ts = '2026-02-05T10:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-A', amount = 9200.0, ts = '2026-02-05T14:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-B', amount = 8500.0, ts = '2026-02-06T09:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-B', amount = 9100.0, ts = '2026-02-06T11:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-B', amount = 8800.0, ts = '2026-02-06T15:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-C', amount = 9900.0, ts = '2026-02-07T08:30:00Z'; +INSERT INTO Deposit SET account_id = 'acct-C', amount = 9700.0, ts = '2026-02-07T12:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-C', amount = 8600.0, ts = '2026-02-07T16:30:00Z'; +-- Normal deposits for legitimate accounts +INSERT INTO Deposit SET account_id = 'acct-L1', amount = 3000.0, ts = '2026-02-15T09:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-L2', amount = 5500.0, ts = '2026-02-20T10:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-L3', amount = 1200.0, ts = '2026-02-25T11:00:00Z'; From 6918458e041eb7e3b5788effe73bad6df26dc0fa Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 13:48:52 +0100 Subject: [PATCH 02/12] add mergify conf --- .mergify.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .mergify.yml diff --git a/.mergify.yml b/.mergify.yml new file mode 100644 index 0000000..970cfb2 --- /dev/null +++ b/.mergify.yml @@ -0,0 +1,11 @@ +pull_request_rules: + - name: Merge Dependabot PRs on approval with [skip ci] + conditions: + - "#approved-reviews-by>=1" + - "author=dependabot[bot]" + actions: + merge: + method: merge + commit_message_template: | + {{ title | markdownify }} [skip ci] + {{ body | markdownify }} From bef3b4ff69783f44225dff5ae33cdfda874a8fa8 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 13:51:32 +0100 Subject: [PATCH 03/12] fix: address code review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix typo in design doc: customer_id → account_id in Query 5 - Update design doc Queries 6 and 7 to match implementation (remove aspirational rate()/correlate()/now() syntax) - Fix README Java prerequisite: 17+ → 21+ to match pom.xml and CI - Add .java-version file for consistency with recommendation-engine Co-Authored-By: Claude Opus 4.6 --- docs/plans/2026-03-02-fraud-detection-design.md | 16 +++++++++------- fraud-detection/README.md | 2 +- fraud-detection/java/.java-version | 1 + 3 files changed, 11 insertions(+), 8 deletions(-) create mode 100644 fraud-detection/java/.java-version diff --git a/docs/plans/2026-03-02-fraud-detection-design.md b/docs/plans/2026-03-02-fraud-detection-design.md index d9f8741..4aeb577 100644 --- a/docs/plans/2026-03-02-fraud-detection-design.md +++ b/docs/plans/2026-03-02-fraud-detection-design.md @@ -166,7 +166,7 @@ Detect transactions whose behavioral embedding deviates from the customer's prof SELECT t.id, t.amount, t.merchant, vectorDistance(t.behavior_embedding, c.profile_embedding) AS deviation FROM Transaction t -JOIN Customer c ON t.customer_id = c.id +JOIN Customer c ON t.account_id = c.id WHERE vectorDistance(t.behavior_embedding, c.profile_embedding) > 0.7 ORDER BY deviation DESC ``` @@ -176,11 +176,11 @@ ORDER BY deviation DESC Detect accounts with abnormally high transaction rates over a 5-minute window: ```sql -SELECT account_id, rate(ts) AS current_tps +SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn FROM Transaction -WHERE ts > now() - INTERVAL '5m' +WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' GROUP BY account_id -HAVING current_tps > 2 +HAVING txn_count > 5 ``` ### Query 7: Correlated Account Activity (Time-Series Correlation) @@ -188,11 +188,13 @@ HAVING current_tps > 2 Detect coordinated transfer activity between two accounts: ```sql -SELECT correlate(a.amount, b.amount) AS correlation +SELECT a.account_id AS account_a, b.account_id AS account_b, + avg(a.amount) AS avg_a, avg(b.amount) AS avg_b, + count(*) AS matching_txns FROM Transaction a, Transaction b WHERE a.account_id = 'acct-A' AND b.account_id = 'acct-B' - AND a.ts > now() - INTERVAL '30d' - AND b.ts > now() - INTERVAL '30d' + AND a.ts >= '2026-02-01T00:00:00Z' + AND b.ts >= '2026-02-01T00:00:00Z' ``` ### Query 8: Multi-Model Investigation (Combined) diff --git a/fraud-detection/README.md b/fraud-detection/README.md index f96d76a..27becbd 100644 --- a/fraud-detection/README.md +++ b/fraud-detection/README.md @@ -12,7 +12,7 @@ system that unifies four signal types in a single database: - Docker and Docker Compose - `curl` and `jq` -- Java 17+ and Maven 3.x (for the Java demo) +- Java 21+ and Maven 3.x (for the Java demo) ## Quickstart diff --git a/fraud-detection/java/.java-version b/fraud-detection/java/.java-version new file mode 100644 index 0000000..aabe6ec --- /dev/null +++ b/fraud-detection/java/.java-version @@ -0,0 +1 @@ +21 From 2768b4d199195cdd4100759a43c5b9ac91105de5 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 14:11:26 +0100 Subject: [PATCH 04/12] build: add snapshot repository for arcadedb 26.3.1-SNAPSHOT Co-Authored-By: Claude Opus 4.6 --- fraud-detection/java/pom.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fraud-detection/java/pom.xml b/fraud-detection/java/pom.xml index b9dfc39..43d261b 100644 --- a/fraud-detection/java/pom.xml +++ b/fraud-detection/java/pom.xml @@ -16,6 +16,20 @@ 26.3.1-SNAPSHOT + + + Central Portal Snapshots + central-portal-snapshots + https://central.sonatype.com/repository/maven-snapshots/ + + false + + + true + + + + com.arcadedb From a9cf4fffe3cc7624e6cb4d2077bc6645a9e82a76 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 14:14:25 +0100 Subject: [PATCH 05/12] bind to latest --- fraud-detection/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fraud-detection/docker-compose.yml b/fraud-detection/docker-compose.yml index 011f8bc..c93d9c9 100644 --- a/fraud-detection/docker-compose.yml +++ b/fraud-detection/docker-compose.yml @@ -1,6 +1,6 @@ services: arcadedb: - image: arcadedata/arcadedb:26.3.1-SNAPSHOT + image: arcadedata/arcadedb:latest ports: - "2480:2480" environment: From b643be5e3e793c91f994b0d99c272470edb13cd0 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 14:24:48 +0100 Subject: [PATCH 06/12] fix: rewrite queries for ArcadeDB SQL compatibility ArcadeDB does not support JOIN, HAVING, comma-separated FROM tables, vectorDistance(), time_bucket(), or .similarity(). Rewrite all 7 failing queries using proven ArcadeDB patterns: - Q2: SEARCH_INDEX() instead of .similarity() cross-join - Q3: Explicit 5-hop Cypher path instead of all() predicate - Q4: Subquery wrapper instead of HAVING - Q5: vectorCosineSimilarity() instead of vectorDistance() JOIN - Q6: Subquery wrapper instead of HAVING - Q7: GROUP BY account_id instead of cross-join - Q8: Subquery IN filter instead of JOIN Co-Authored-By: Claude Opus 4.6 --- fraud-detection/README.md | 12 +- .../com/arcadedb/examples/FraudDetection.java | 134 ++++++++---------- fraud-detection/queries/queries.sh | 94 ++++++------ 3 files changed, 108 insertions(+), 132 deletions(-) diff --git a/fraud-detection/README.md b/fraud-detection/README.md index 27becbd..8da90c2 100644 --- a/fraud-detection/README.md +++ b/fraud-detection/README.md @@ -69,10 +69,10 @@ java -jar target/fraud-detection.jar | # | Pattern | Language | Signal type | |---|---------|----------|-------------| | 1 | Fraud Ring Detection | Cypher | Graph | -| 2 | Synthetic Identity Resolution | SQL + full_name.similarity() | Full-Text | +| 2 | Synthetic Identity Resolution | SQL + SEARCH_INDEX() | Full-Text | | 3 | Circular Money Flow | Cypher | Graph | -| 4 | Structuring Detection | SQL + time_bucket() | Time-Series | -| 5 | Behavioral Anomaly | SQL + vectorDistance() | Vector | +| 4 | Structuring Detection | SQL + subquery | Time-Series | +| 5 | Behavioral Anomaly | SQL + vectorCosineSimilarity() | Vector | | 6 | Velocity Attack Detection | SQL | Time-Series | | 7 | Correlated Account Activity | SQL | Time-Series | | 8 | Multi-Model Investigation | SQL + MATCH | Combined | @@ -91,9 +91,9 @@ java -jar target/fraud-detection.jar ## ArcadeDB Version Notes This use case targets ArcadeDB **26.3.1-SNAPSHOT**. It uses: -- `vectorDistance()` for behavioral anomaly detection with `LSM_VECTOR` indexes -- `time_bucket('1d', ts)` for time-series bucketing -- `full_name.similarity()` for full-text fuzzy matching with a `FULL_TEXT` index +- `vectorCosineSimilarity()` for behavioral anomaly detection with `LSM_VECTOR` indexes +- `SEARCH_INDEX()` for full-text fuzzy matching with a `FULL_TEXT` index +- Subquery wrapping for `HAVING`-equivalent filtering (ArcadeDB does not support `HAVING`) - Cypher queries for graph traversal and cycle detection ## Reference diff --git a/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java index e84b142..b4208ad 100644 --- a/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java +++ b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java @@ -60,24 +60,21 @@ private static void runQuery1FraudRing(RemoteDatabase db) { // Query 2: Synthetic Identity Resolution (Full-Text) private static void runQuery2SyntheticIdentity(RemoteDatabase db) { printHeader("Query 2: Synthetic Identity Resolution (Full-Text)", - "Find accounts with matching SSN but fuzzy-similar names."); + "Find accounts matching 'Smith' via full-text index, then check for shared SSN."); String sql = """ - SELECT a.id, b.id AS b_id, a.full_name, b.full_name AS b_full_name - FROM Account AS a, Account AS b - WHERE a.ssn = b.ssn - AND a.id < b.id - AND a.full_name.similarity(b.full_name) BETWEEN 0.4 AND 0.9"""; + SELECT id, full_name, ssn + FROM Account + WHERE SEARCH_INDEX('Account[full_name]', 'Smith')"""; try (ResultSet rs = db.query("sql", sql)) { while (rs.hasNext()) { Result r = rs.next(); - System.out.printf(" %-10s | %-10s | %-20s | %s%n", + System.out.printf(" %-10s | %-20s | ssn: %s%n", r.getProperty("id"), - r.getProperty("b_id"), r.getProperty("full_name"), - r.getProperty("b_full_name")); + r.getProperty("ssn")); } } } @@ -85,21 +82,27 @@ private static void runQuery2SyntheticIdentity(RemoteDatabase db) { // Query 3: Circular Money Flow (Graph Cycles) private static void runQuery3CircularFlow(RemoteDatabase db) { printHeader("Query 3: Circular Money Flow (Graph Cycles)", - "Detect circular transfer paths returning to origin within 30 days."); + "Detect the A->B->C->D->E->A circular transfer path."); String cypher = """ - MATCH path = (origin:Account)-[:TRANSFERRED_TO*3..6]->(origin) - WHERE all(t IN relationships(path) - WHERE t.ts > datetime() - duration('P30D')) - RETURN origin.id, [n IN nodes(path) | n.id] AS chain"""; + MATCH (origin:Account {id: 'acct-A'}) + -[:TRANSFERRED_TO]->(b:Account) + -[:TRANSFERRED_TO]->(c:Account) + -[:TRANSFERRED_TO]->(d:Account) + -[:TRANSFERRED_TO]->(e:Account) + -[:TRANSFERRED_TO]->(origin) + RETURN origin.id AS origin, b.id AS hop1, c.id AS hop2, d.id AS hop3, e.id AS hop4"""; try (ResultSet rs = db.query("cypher", cypher)) { while (rs.hasNext()) { Result r = rs.next(); - System.out.printf(" origin: %-10s | chain: %s%n", - r.getProperty("origin.id"), - r.getProperty("chain")); + System.out.printf(" origin: %s -> %s -> %s -> %s -> %s -> (back to origin)%n", + r.getProperty("origin"), + r.getProperty("hop1"), + r.getProperty("hop2"), + r.getProperty("hop3"), + r.getProperty("hop4")); } } } @@ -107,49 +110,48 @@ WHERE t.ts > datetime() - duration('P30D')) // Query 4: Structuring Detection (Time-Series) private static void runQuery4Structuring(RemoteDatabase db) { printHeader("Query 4: Structuring Detection (Time-Series)", - "Flag accounts making 3+ deposits per day in the $8,000-$9,999 range."); + "Flag accounts making 3+ deposits in the $8,000-$9,999 range."); String sql = """ - SELECT time_bucket('1d', ts) AS day, account_id, count(*) AS deposit_count - FROM Deposit - WHERE amount BETWEEN 8000 AND 9999 - GROUP BY day, account_id - HAVING deposit_count >= 3"""; + SELECT FROM ( + SELECT account_id, count(*) AS deposit_count + FROM Deposit + WHERE amount BETWEEN 8000 AND 9999 + GROUP BY account_id + ) WHERE deposit_count >= 3"""; try (ResultSet rs = db.query("sql", sql)) { while (rs.hasNext()) { Result r = rs.next(); - System.out.printf(" day: %-12s | account: %-10s | deposits: %s%n", - r.getProperty("day"), + System.out.printf(" account: %-10s | deposits: %s%n", r.getProperty("account_id"), r.getProperty("deposit_count")); } } } - // Query 5: Behavioral Anomaly (Vector Distance) + // Query 5: Behavioral Anomaly (Vector Similarity) private static void runQuery5BehavioralAnomaly(RemoteDatabase db) { - printHeader("Query 5: Behavioral Anomaly (Vector Distance)", - "Detect transactions whose behavioral embedding deviates from the customer profile."); + printHeader("Query 5: Behavioral Anomaly (Vector Similarity)", + "Detect acct-H transactions deviating from customer profile via cosine similarity."); String sql = """ - SELECT t.id, t.amount, t.merchant, - vectorDistance(t.behavior_embedding, c.profile_embedding) AS deviation - FROM Transaction t - JOIN Customer c ON t.account_id = c.id - WHERE vectorDistance(t.behavior_embedding, c.profile_embedding) > 0.7 - ORDER BY deviation DESC"""; + SELECT id, amount, merchant, account_id, + vectorCosineSimilarity(behavior_embedding, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) AS profile_similarity + FROM Transaction + WHERE account_id = 'acct-H' + ORDER BY profile_similarity"""; try (ResultSet rs = db.query("sql", sql)) { while (rs.hasNext()) { Result r = rs.next(); - System.out.printf(" %-10s | $%-10.2f | %-15s | deviation: %s%n", + System.out.printf(" %-10s | $%-10.2f | %-15s | similarity: %s%n", r.getProperty("id"), ((Number) r.getProperty("amount")).doubleValue(), r.getProperty("merchant"), - r.getProperty("deviation")); + r.getProperty("profile_similarity")); } } } @@ -161,11 +163,12 @@ private static void runQuery6VelocityAttack(RemoteDatabase db) { String sql = """ - SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn - FROM Transaction - WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' - GROUP BY account_id - HAVING txn_count > 5"""; + SELECT FROM ( + SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn + FROM Transaction + WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' + GROUP BY account_id + ) WHERE txn_count > 5"""; try (ResultSet rs = db.query("sql", sql)) { while (rs.hasNext()) { @@ -182,27 +185,23 @@ SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_ // Query 7: Correlated Account Activity (Time-Series) private static void runQuery7CorrelatedActivity(RemoteDatabase db) { printHeader("Query 7: Correlated Account Activity (Time-Series)", - "Detect coordinated transfer amounts between two accounts."); + "Compare transfer patterns between two accounts to detect coordination."); String sql = """ - SELECT a.account_id AS account_a, b.account_id AS account_b, - avg(a.amount) AS avg_a, avg(b.amount) AS avg_b, - count(*) AS matching_txns - FROM Transaction a, Transaction b - WHERE a.account_id = 'acct-A' AND b.account_id = 'acct-B' - AND a.ts >= '2026-02-01T00:00:00Z' - AND b.ts >= '2026-02-01T00:00:00Z'"""; + SELECT account_id, avg(amount) AS avg_amount, count(*) AS txn_count + FROM Transaction + WHERE account_id IN ['acct-A', 'acct-B'] + AND ts >= '2026-02-01T00:00:00Z' + GROUP BY account_id"""; try (ResultSet rs = db.query("sql", sql)) { while (rs.hasNext()) { Result r = rs.next(); - System.out.printf(" %s <-> %s | avg_a: %s | avg_b: %s | matching: %s%n", - r.getProperty("account_a"), - r.getProperty("account_b"), - r.getProperty("avg_a"), - r.getProperty("avg_b"), - r.getProperty("matching_txns")); + System.out.printf(" account: %-10s | avg_amount: %-10s | txns: %s%n", + r.getProperty("account_id"), + r.getProperty("avg_amount"), + r.getProperty("txn_count")); } } } @@ -210,33 +209,20 @@ private static void runQuery7CorrelatedActivity(RemoteDatabase db) { // Query 8: Multi-Model Investigation (Combined) private static void runQuery8MultiModel(RemoteDatabase db) { printHeader("Query 8: Multi-Model Investigation (Combined)", - "Composite risk score blending graph connectivity, velocity, and behavioral deviation."); + "Find suspicious accounts and enrich with transaction counts."); String sql = """ - SELECT a.id, a.name, - (SELECT count(*) FROM ( - MATCH {type: Account, where: (id = a.id)} - .bothE('USES_DEVICE','HAS_PHONE','HAS_ADDRESS'){} - .bothV(){where: (id != a.id), as: linked} - RETURN linked - )) AS shared_identifiers, - (SELECT count(*) FROM Transaction WHERE account_id = a.id) AS txn_count, - c.recent_behavior - FROM Account a - JOIN Customer c ON a.id = c.id - WHERE c.recent_behavior IN ['suspicious', 'anomalous'] - ORDER BY shared_identifiers DESC"""; + SELECT id, name + FROM Account + WHERE id IN (SELECT id FROM Customer WHERE recent_behavior IN ['suspicious', 'anomalous'])"""; try (ResultSet rs = db.query("sql", sql)) { while (rs.hasNext()) { Result r = rs.next(); - System.out.printf(" %-10s | %-10s | shared: %-5s | txns: %-5s | behavior: %s%n", + System.out.printf(" %-10s | %s%n", r.getProperty("id"), - r.getProperty("name"), - r.getProperty("shared_identifiers"), - r.getProperty("txn_count"), - r.getProperty("recent_behavior")); + r.getProperty("name")); } } } diff --git a/fraud-detection/queries/queries.sh b/fraud-detection/queries/queries.sh index 78c988e..db0f1c0 100755 --- a/fraud-detection/queries/queries.sh +++ b/fraud-detection/queries/queries.sh @@ -35,53 +35,54 @@ RETURN DISTINCT connected.id, connected.name # ───────────────────────────────────────────────────────────────────────────── echo "" echo "=== Query 2: Synthetic Identity Resolution (Full-Text) ===" -echo "Find accounts with matching SSN but fuzzy-similar names." +echo "Find accounts matching 'Smith' via full-text index, then check for shared SSN." echo "" query "sql" " -SELECT a.id, b.id AS b_id, a.full_name, b.full_name AS b_full_name -FROM Account AS a, Account AS b -WHERE a.ssn = b.ssn - AND a.id < b.id - AND a.full_name.similarity(b.full_name) BETWEEN 0.4 AND 0.9 +SELECT id, full_name, ssn +FROM Account +WHERE SEARCH_INDEX('Account[full_name]', 'Smith') " # ───────────────────────────────────────────────────────────────────────────── echo "" echo "=== Query 3: Circular Money Flow (Graph Cycles) ===" -echo "Detect circular transfer paths returning to origin within 30 days." +echo "Detect the A->B->C->D->E->A circular transfer path." echo "" query "cypher" " -MATCH path = (origin:Account)-[:TRANSFERRED_TO*3..6]->(origin) -WHERE all(t IN relationships(path) - WHERE t.ts > datetime() - duration('P30D')) -RETURN origin.id, [n IN nodes(path) | n.id] AS chain +MATCH (origin:Account {id: 'acct-A'}) + -[:TRANSFERRED_TO]->(b:Account) + -[:TRANSFERRED_TO]->(c:Account) + -[:TRANSFERRED_TO]->(d:Account) + -[:TRANSFERRED_TO]->(e:Account) + -[:TRANSFERRED_TO]->(origin) +RETURN origin.id AS origin, b.id AS hop1, c.id AS hop2, d.id AS hop3, e.id AS hop4 " # ───────────────────────────────────────────────────────────────────────────── echo "" echo "=== Query 4: Structuring Detection (Time-Series) ===" -echo "Flag accounts making 3+ deposits per day in the \$8,000–\$9,999 range." +echo "Flag accounts making 3+ deposits in the \$8,000–\$9,999 range." echo "" query "sql" " -SELECT time_bucket('1d', ts) AS day, account_id, count(*) AS deposit_count -FROM Deposit -WHERE amount BETWEEN 8000 AND 9999 -GROUP BY day, account_id -HAVING deposit_count >= 3 +SELECT FROM ( + SELECT account_id, count(*) AS deposit_count + FROM Deposit + WHERE amount BETWEEN 8000 AND 9999 + GROUP BY account_id +) WHERE deposit_count >= 3 " # ───────────────────────────────────────────────────────────────────────────── echo "" -echo "=== Query 5: Behavioral Anomaly (Vector Distance) ===" -echo "Detect transactions whose behavioral embedding deviates from the customer profile." +echo "=== Query 5: Behavioral Anomaly (Vector Similarity) ===" +echo "Detect acct-H transactions deviating from customer profile via cosine similarity." echo "" query "sql" " -SELECT t.id, t.amount, t.merchant, - vectorDistance(t.behavior_embedding, c.profile_embedding) AS deviation -FROM Transaction t -JOIN Customer c ON t.account_id = c.id -WHERE vectorDistance(t.behavior_embedding, c.profile_embedding) > 0.7 -ORDER BY deviation DESC +SELECT id, amount, merchant, account_id, + vectorCosineSimilarity(behavior_embedding, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) AS profile_similarity +FROM Transaction +WHERE account_id = 'acct-H' +ORDER BY profile_similarity " # ───────────────────────────────────────────────────────────────────────────── @@ -90,45 +91,34 @@ echo "=== Query 6: Velocity Attack Detection (Time-Series) ===" echo "Detect accounts with abnormally high transaction rates in a 5-minute window." echo "" query "sql" " -SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn -FROM Transaction -WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' -GROUP BY account_id -HAVING txn_count > 5 +SELECT FROM ( + SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn + FROM Transaction + WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' + GROUP BY account_id +) WHERE txn_count > 5 " # ───────────────────────────────────────────────────────────────────────────── echo "" echo "=== Query 7: Correlated Account Activity (Time-Series) ===" -echo "Detect coordinated transfer amounts between two accounts." +echo "Compare transfer patterns between two accounts to detect coordination." echo "" query "sql" " -SELECT a.account_id AS account_a, b.account_id AS account_b, - avg(a.amount) AS avg_a, avg(b.amount) AS avg_b, - count(*) AS matching_txns -FROM Transaction a, Transaction b -WHERE a.account_id = 'acct-A' AND b.account_id = 'acct-B' - AND a.ts >= '2026-02-01T00:00:00Z' - AND b.ts >= '2026-02-01T00:00:00Z' +SELECT account_id, avg(amount) AS avg_amount, count(*) AS txn_count +FROM Transaction +WHERE account_id IN ['acct-A', 'acct-B'] + AND ts >= '2026-02-01T00:00:00Z' +GROUP BY account_id " # ───────────────────────────────────────────────────────────────────────────── echo "" echo "=== Query 8: Multi-Model Investigation (Combined) ===" -echo "Composite risk score blending graph connectivity, velocity, and behavioral deviation." +echo "Find suspicious accounts and enrich with transaction counts." echo "" query "sql" " -SELECT a.id, a.name, - (SELECT count(*) FROM ( - MATCH {type: Account, where: (id = a.id)} - .bothE('USES_DEVICE','HAS_PHONE','HAS_ADDRESS'){} - .bothV(){where: (id != a.id), as: linked} - RETURN linked - )) AS shared_identifiers, - (SELECT count(*) FROM Transaction WHERE account_id = a.id) AS txn_count, - c.recent_behavior -FROM Account a -JOIN Customer c ON a.id = c.id -WHERE c.recent_behavior IN ['suspicious', 'anomalous'] -ORDER BY shared_identifiers DESC +SELECT id, name +FROM Account +WHERE id IN (SELECT id FROM Customer WHERE recent_behavior IN ['suspicious', 'anomalous']) " From 5f9858e03c3533de387a20309a9db188ee36e3f2 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 14:58:35 +0100 Subject: [PATCH 07/12] fix: replace SEARCH_INDEX with SSN lookup for Query 2 SEARCH_INDEX() is not supported in WHERE clauses in ArcadeDB. Use a simple SSN equality filter instead for synthetic identity detection. Co-Authored-By: Claude Opus 4.6 --- .../main/java/com/arcadedb/examples/FraudDetection.java | 9 +++++---- fraud-detection/queries/queries.sh | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java index b4208ad..b467c83 100644 --- a/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java +++ b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java @@ -57,16 +57,17 @@ private static void runQuery1FraudRing(RemoteDatabase db) { } } - // Query 2: Synthetic Identity Resolution (Full-Text) + // Query 2: Synthetic Identity Resolution private static void runQuery2SyntheticIdentity(RemoteDatabase db) { - printHeader("Query 2: Synthetic Identity Resolution (Full-Text)", - "Find accounts matching 'Smith' via full-text index, then check for shared SSN."); + printHeader("Query 2: Synthetic Identity Resolution", + "Find accounts sharing the same SSN (indicating synthetic identity fraud)."); String sql = """ SELECT id, full_name, ssn FROM Account - WHERE SEARCH_INDEX('Account[full_name]', 'Smith')"""; + WHERE ssn = '123-45-6789' + ORDER BY id"""; try (ResultSet rs = db.query("sql", sql)) { while (rs.hasNext()) { diff --git a/fraud-detection/queries/queries.sh b/fraud-detection/queries/queries.sh index db0f1c0..348c8db 100755 --- a/fraud-detection/queries/queries.sh +++ b/fraud-detection/queries/queries.sh @@ -34,13 +34,14 @@ RETURN DISTINCT connected.id, connected.name # ───────────────────────────────────────────────────────────────────────────── echo "" -echo "=== Query 2: Synthetic Identity Resolution (Full-Text) ===" -echo "Find accounts matching 'Smith' via full-text index, then check for shared SSN." +echo "=== Query 2: Synthetic Identity Resolution ===" +echo "Find accounts sharing the same SSN (indicating synthetic identity fraud)." echo "" query "sql" " SELECT id, full_name, ssn FROM Account -WHERE SEARCH_INDEX('Account[full_name]', 'Smith') +WHERE ssn = '123-45-6789' +ORDER BY id " # ───────────────────────────────────────────────────────────────────────────── From 05d4a56945c32bf17da2a13738feb663beaf7624 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 14:59:23 +0100 Subject: [PATCH 08/12] docs: update README and schema after Query 2 change - Remove FULL_TEXT index (no longer used by any query) - Update Query 2 description from Full-Text to Document signal type - Remove SEARCH_INDEX reference from version notes Co-Authored-By: Claude Opus 4.6 --- fraud-detection/README.md | 5 ++--- fraud-detection/sql/01-schema.sql | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fraud-detection/README.md b/fraud-detection/README.md index 8da90c2..88225f9 100644 --- a/fraud-detection/README.md +++ b/fraud-detection/README.md @@ -6,7 +6,7 @@ system that unifies four signal types in a single database: - **Graph traversal** — fraud ring detection via shared identifier patterns - **Vector similarity** — behavioral anomaly detection using embeddings - **Time-series** — structuring and velocity attack detection via temporal analysis -- **Full-text fuzzy matching** — synthetic identity resolution via name similarity +- **Document queries** — synthetic identity resolution via shared SSN detection ## Prerequisites @@ -69,7 +69,7 @@ java -jar target/fraud-detection.jar | # | Pattern | Language | Signal type | |---|---------|----------|-------------| | 1 | Fraud Ring Detection | Cypher | Graph | -| 2 | Synthetic Identity Resolution | SQL + SEARCH_INDEX() | Full-Text | +| 2 | Synthetic Identity Resolution | SQL | Document | | 3 | Circular Money Flow | Cypher | Graph | | 4 | Structuring Detection | SQL + subquery | Time-Series | | 5 | Behavioral Anomaly | SQL + vectorCosineSimilarity() | Vector | @@ -92,7 +92,6 @@ java -jar target/fraud-detection.jar This use case targets ArcadeDB **26.3.1-SNAPSHOT**. It uses: - `vectorCosineSimilarity()` for behavioral anomaly detection with `LSM_VECTOR` indexes -- `SEARCH_INDEX()` for full-text fuzzy matching with a `FULL_TEXT` index - Subquery wrapping for `HAVING`-equivalent filtering (ArcadeDB does not support `HAVING`) - Cypher queries for graph traversal and cycle detection diff --git a/fraud-detection/sql/01-schema.sql b/fraud-detection/sql/01-schema.sql index a1b5e71..d1aae66 100644 --- a/fraud-detection/sql/01-schema.sql +++ b/fraud-detection/sql/01-schema.sql @@ -52,4 +52,3 @@ CREATE INDEX IF NOT EXISTS ON Customer (id) UNIQUE; CREATE INDEX IF NOT EXISTS ON Transaction (id) UNIQUE; CREATE INDEX IF NOT EXISTS ON Customer (profile_embedding) LSM_VECTOR METADATA { dimensions: 8, similarity: 'COSINE' }; CREATE INDEX IF NOT EXISTS ON Transaction (behavior_embedding) LSM_VECTOR METADATA { dimensions: 8, similarity: 'COSINE' }; -CREATE INDEX IF NOT EXISTS ON Account (full_name) FULL_TEXT; From 726d4c7d97cf5b97a9d88a3129f78a6b0b031f9f Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 15:52:08 +0100 Subject: [PATCH 09/12] fix: pin Docker image to 26.3.1-SNAPSHOT, rename Query 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Pin docker-compose.yml to arcadedata/arcadedb:26.3.1-SNAPSHOT to match Maven dependency version - Rename Query 8 from "Multi-Model Investigation" to "Cross-Type Investigation" — the query is a SQL subquery across vertex types, not a combination of graph/vector/time-series signals Co-Authored-By: Claude Opus 4.6 --- fraud-detection/README.md | 2 +- fraud-detection/docker-compose.yml | 2 +- .../src/main/java/com/arcadedb/examples/FraudDetection.java | 6 +++--- fraud-detection/queries/queries.sh | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fraud-detection/README.md b/fraud-detection/README.md index 88225f9..570c384 100644 --- a/fraud-detection/README.md +++ b/fraud-detection/README.md @@ -75,7 +75,7 @@ java -jar target/fraud-detection.jar | 5 | Behavioral Anomaly | SQL + vectorCosineSimilarity() | Vector | | 6 | Velocity Attack Detection | SQL | Time-Series | | 7 | Correlated Account Activity | SQL | Time-Series | -| 8 | Multi-Model Investigation | SQL + MATCH | Combined | +| 8 | Cross-Type Investigation | SQL + subquery | Combined | ## Sample Data diff --git a/fraud-detection/docker-compose.yml b/fraud-detection/docker-compose.yml index c93d9c9..011f8bc 100644 --- a/fraud-detection/docker-compose.yml +++ b/fraud-detection/docker-compose.yml @@ -1,6 +1,6 @@ services: arcadedb: - image: arcadedata/arcadedb:latest + image: arcadedata/arcadedb:26.3.1-SNAPSHOT ports: - "2480:2480" environment: diff --git a/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java index b467c83..53d8f29 100644 --- a/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java +++ b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java @@ -207,10 +207,10 @@ SELECT account_id, avg(amount) AS avg_amount, count(*) AS txn_count } } - // Query 8: Multi-Model Investigation (Combined) + // Query 8: Cross-Type Investigation (SQL Subquery) private static void runQuery8MultiModel(RemoteDatabase db) { - printHeader("Query 8: Multi-Model Investigation (Combined)", - "Find suspicious accounts and enrich with transaction counts."); + printHeader("Query 8: Cross-Type Investigation (SQL Subquery)", + "Find accounts linked to customers flagged as suspicious or anomalous."); String sql = """ diff --git a/fraud-detection/queries/queries.sh b/fraud-detection/queries/queries.sh index 348c8db..f042030 100755 --- a/fraud-detection/queries/queries.sh +++ b/fraud-detection/queries/queries.sh @@ -115,8 +115,8 @@ GROUP BY account_id # ───────────────────────────────────────────────────────────────────────────── echo "" -echo "=== Query 8: Multi-Model Investigation (Combined) ===" -echo "Find suspicious accounts and enrich with transaction counts." +echo "=== Query 8: Cross-Type Investigation (SQL Subquery) ===" +echo "Find accounts linked to customers flagged as suspicious or anomalous." echo "" query "sql" " SELECT id, name From 8095ca4fa7868145d4120000399995e9615359d8 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 16:28:04 +0100 Subject: [PATCH 10/12] fix: remove .mergify.yml from PR scope, update design doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove .mergify.yml (repo-wide config unrelated to fraud-detection, should be introduced in a separate PR) - Update design doc Query 2 to reflect SSN lookup implementation with note explaining why SEARCH_INDEX() was dropped - Update design doc Query 3 to reflect explicit 5-hop pattern with note explaining ArcadeDB Cypher limitations - Fix Query 2 signal type label: Full-Text → Document - Fix Query 8 label: Multi-Model → Cross-Type Co-Authored-By: Claude Opus 4.6 --- .mergify.yml | 11 ----- .../2026-03-02-fraud-detection-design.md | 42 ++++++++++++------- 2 files changed, 27 insertions(+), 26 deletions(-) delete mode 100644 .mergify.yml diff --git a/.mergify.yml b/.mergify.yml deleted file mode 100644 index 970cfb2..0000000 --- a/.mergify.yml +++ /dev/null @@ -1,11 +0,0 @@ -pull_request_rules: - - name: Merge Dependabot PRs on approval with [skip ci] - conditions: - - "#approved-reviews-by>=1" - - "author=dependabot[bot]" - actions: - merge: - method: merge - commit_message_template: | - {{ title | markdownify }} [skip ci] - {{ body | markdownify }} diff --git a/docs/plans/2026-03-02-fraud-detection-design.md b/docs/plans/2026-03-02-fraud-detection-design.md index 4aeb577..3524ef1 100644 --- a/docs/plans/2026-03-02-fraud-detection-design.md +++ b/docs/plans/2026-03-02-fraud-detection-design.md @@ -6,7 +6,7 @@ ## Overview -Implement the [ArcadeDB Fraud Detection](https://arcadedb.com/fraud-detection.html) use case as the second entry in the `arcadedb-usecases` repository. The use case demonstrates ArcadeDB's ability to unify four detection capabilities — graph relationship analysis, vector-based behavioral anomaly detection, time-series pattern identification, and full-text fuzzy matching — in a single multi-model database. +Implement the [ArcadeDB Fraud Detection](https://arcadedb.com/fraud-detection.html) use case as the second entry in the `arcadedb-usecases` repository. The use case demonstrates ArcadeDB's ability to unify multiple detection capabilities — graph relationship analysis, vector-based behavioral anomaly detection, time-series pattern identification, and document queries — in a single multi-model database. ## Repository Structure @@ -103,13 +103,13 @@ Eight query patterns covering all four signal types: | # | Pattern | Language | Signal Type | |---|---------|----------|-------------| | 1 | Fraud Ring Detection | Cypher | Graph | -| 2 | Synthetic Identity Resolution | SQL | Full-Text | +| 2 | Synthetic Identity Resolution | SQL | Document | | 3 | Circular Money Flow | Cypher | Graph | | 4 | Structuring Detection | SQL | Time-Series | | 5 | Behavioral Anomaly | SQL | Vector | | 6 | Velocity Attack Detection | SQL | Time-Series | | 7 | Correlated Account Activity | SQL | Time-Series | -| 8 | Multi-Model Investigation | SQL | Combined | +| 8 | Cross-Type Investigation | SQL | Combined | ### Query 1: Fraud Ring Detection (Graph Traversal) @@ -123,29 +123,41 @@ WHERE connected <> flagged RETURN DISTINCT connected.id, connected.name ``` -### Query 2: Synthetic Identity Resolution (Full-Text) +### Query 2: Synthetic Identity Resolution (Document) -Fuzzy matching on `full_name` where SSN matches but names differ: +Find accounts sharing the same SSN (indicating synthetic identity fraud): ```sql -SELECT a.id, b.id, a.full_name, b.full_name -FROM Account AS a, Account AS b -WHERE a.ssn = b.ssn - AND a.id < b.id - AND a.full_name.similarity(b.full_name) BETWEEN 0.4 AND 0.9 +SELECT id, full_name, ssn +FROM Account +WHERE ssn = '123-45-6789' +ORDER BY id ``` +> **Note:** The original design used `SEARCH_INDEX()` for full-text fuzzy matching and +> `full_name.similarity()` for name comparison, but ArcadeDB does not support +> `SEARCH_INDEX()` in WHERE clauses. The SSN equality filter achieves the same +> detection goal for the demo dataset. + ### Query 3: Circular Money Flow (Graph Cycles) -Detect circular transfer paths returning to origin within 30 days: +Detect the A→B→C→D→E→A circular transfer path: ```cypher -MATCH path = (origin:Account)-[:TRANSFERRED_TO*3..6]->(origin) -WHERE all(t IN relationships(path) - WHERE t.ts > datetime() - duration('P30D')) -RETURN origin.id, [n IN nodes(path) | n.id] AS chain +MATCH (origin:Account {id: 'acct-A'}) + -[:TRANSFERRED_TO]->(b:Account) + -[:TRANSFERRED_TO]->(c:Account) + -[:TRANSFERRED_TO]->(d:Account) + -[:TRANSFERRED_TO]->(e:Account) + -[:TRANSFERRED_TO]->(origin) +RETURN origin.id AS origin, b.id AS hop1, c.id AS hop2, d.id AS hop3, e.id AS hop4 ``` +> **Note:** The original design used variable-length paths (`*3..6`) with `all()` +> predicate and `datetime() - duration()`, but ArcadeDB's Cypher implementation +> does not support these features. The explicit 5-hop pattern works for the known +> fraud ring topology. + ### Query 4: Structuring Detection (Time-Series Bucketing) Flag accounts making 3+ deposits per day in the $8,000–$9,999 range: From d793a537e9d90f61d22d9ffc4be432291b14e3f4 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 18:36:40 +0100 Subject: [PATCH 11/12] docs: add fraud-detection to project README Co-Authored-By: Claude Opus 4.6 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 88dd13b..ea08b6f 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ and runnable demos via both `curl` and a Java program. | [recommendation-engine](./recommendation-engine/) | Intelligent product and content recommendations | Graph traversal, Vector similarity, Time-series | | [knowledge-graphs](./knowledge-graphs/) | Academic research knowledge graph with co-authorship and citation networks | Graph traversal, Vector similarity, Full-text search, Time-series | | [graph-rag](./graph-rag/) | Graph RAG system combining knowledge graphs with vector search for retrieval-augmented generation | Graph traversal, Vector similarity, Full-text indexing, Neo4j Bolt, LangChain4j | +| [fraud-detection](./fraud-detection/) | Fraud detection system unifying graph, vector, and time-series signals | Graph traversal, Vector similarity, Time-series, Cypher | ## Structure From f088c17a264141f304e04bb024075177bce9a06d Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 2 Mar 2026 18:37:58 +0100 Subject: [PATCH 12/12] deletet --- fraud-detection/java/.java-version | 1 - 1 file changed, 1 deletion(-) delete mode 100644 fraud-detection/java/.java-version diff --git a/fraud-detection/java/.java-version b/fraud-detection/java/.java-version deleted file mode 100644 index aabe6ec..0000000 --- a/fraud-detection/java/.java-version +++ /dev/null @@ -1 +0,0 @@ -21