diff --git a/.github/workflows/fraud-detection.yml b/.github/workflows/fraud-detection.yml new file mode 100644 index 0000000..71299dd --- /dev/null +++ b/.github/workflows/fraud-detection.yml @@ -0,0 +1,73 @@ +name: Fraud Detection CI + +on: + push: + paths: + - fraud-detection/** + - .github/workflows/fraud-detection.yml + pull_request: + paths: + - fraud-detection/** + - .github/workflows/fraud-detection.yml + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: read + strategy: + fail-fast: false + matrix: + runner: [curl, java] + + env: + ARCADEDB_URL: http://localhost:2480 + ARCADEDB_USER: root + ARCADEDB_PASS: arcadedb + + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + + - name: Set up Java + if: matrix.runner == 'java' + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: '21' + distribution: 'temurin' + + - name: Cache Maven repository + if: matrix.runner == 'java' + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('fraud-detection/java/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + + - name: Start ArcadeDB + working-directory: fraud-detection + run: docker compose up -d + + - name: Setup database + working-directory: fraud-detection + run: ./setup.sh + + - name: Run curl queries + if: matrix.runner == 'curl' + working-directory: fraud-detection + run: ./queries/queries.sh + + - name: Build and run Java + if: matrix.runner == 'java' + working-directory: fraud-detection/java + run: | + mvn package --no-transfer-progress + java -jar target/fraud-detection.jar + + - name: Teardown + if: always() + working-directory: fraud-detection + run: docker compose down diff --git a/README.md b/README.md index 88dd13b..ea08b6f 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ and runnable demos via both `curl` and a Java program. | [recommendation-engine](./recommendation-engine/) | Intelligent product and content recommendations | Graph traversal, Vector similarity, Time-series | | [knowledge-graphs](./knowledge-graphs/) | Academic research knowledge graph with co-authorship and citation networks | Graph traversal, Vector similarity, Full-text search, Time-series | | [graph-rag](./graph-rag/) | Graph RAG system combining knowledge graphs with vector search for retrieval-augmented generation | Graph traversal, Vector similarity, Full-text indexing, Neo4j Bolt, LangChain4j | +| [fraud-detection](./fraud-detection/) | Fraud detection system unifying graph, vector, and time-series signals | Graph traversal, Vector similarity, Time-series, Cypher | ## Structure diff --git a/docs/plans/2026-03-02-fraud-detection-ci.md b/docs/plans/2026-03-02-fraud-detection-ci.md new file mode 100644 index 0000000..be4407b --- /dev/null +++ b/docs/plans/2026-03-02-fraud-detection-ci.md @@ -0,0 +1,135 @@ +# Fraud Detection CI Workflow Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Create `.github/workflows/fraud-detection.yml` — a matrix CI workflow that verifies both the `curl` and `java` runners for the fraud-detection use case on every push and pull request. + +**Architecture:** One `test` job with `matrix: runner: [curl, java]`. Each entry is self-contained: it starts ArcadeDB 26.3.1-SNAPSHOT via `docker compose up -d`, runs `./setup.sh` to load schema and data, runs the language-specific command, then tears down with `if: always()`. Pass criterion is exit code 0. Mirrors `.github/workflows/recommendation-engine.yml` exactly — same action versions, same SHA pins, same step structure. + +**Tech Stack:** GitHub Actions, `actions/checkout@v6` (SHA `de0fac2e`), `actions/setup-java@v5` (SHA `be666c2f`, temurin 21), `actions/cache@v5` (SHA `cdf6c1fa`), Docker Compose, Maven 3.x, Java 21, bash/curl/jq (pre-installed on `ubuntu-latest`) + +--- + +### Task 1: Create the workflow file + +**Files:** +- Create: `.github/workflows/fraud-detection.yml` + +**Step 1: Write the file** + +```yaml +name: Fraud Detection CI + +on: + push: + paths: + - fraud-detection/** + - .github/workflows/fraud-detection.yml + pull_request: + paths: + - fraud-detection/** + - .github/workflows/fraud-detection.yml + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: read + strategy: + fail-fast: false + matrix: + runner: [curl, java] + + env: + ARCADEDB_URL: http://localhost:2480 + ARCADEDB_USER: root + ARCADEDB_PASS: arcadedb + + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + + - name: Set up Java + if: matrix.runner == 'java' + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: '21' + distribution: 'temurin' + + - name: Cache Maven repository + if: matrix.runner == 'java' + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('fraud-detection/java/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + + - name: Start ArcadeDB + working-directory: fraud-detection + run: docker compose up -d + + - name: Setup database + working-directory: fraud-detection + run: ./setup.sh + + - name: Run curl queries + if: matrix.runner == 'curl' + working-directory: fraud-detection + run: ./queries/queries.sh + + - name: Build and run Java + if: matrix.runner == 'java' + working-directory: fraud-detection/java + run: | + mvn package --no-transfer-progress + java -jar target/fraud-detection.jar + + - name: Teardown + if: always() + working-directory: fraud-detection + run: docker compose down +``` + +**Step 2: Validate YAML syntax** + +```bash +python3 -c "import yaml; yaml.safe_load(open('.github/workflows/fraud-detection.yml'))" && echo "YAML valid" +``` + +Expected: `YAML valid` + +**Step 3: Commit** + +```bash +git add .github/workflows/fraud-detection.yml +git commit -m "ci: add fraud-detection workflow (curl + java matrix)" +``` + +--- + +### Task 2: Verify the workflow triggers + +**Step 1: Push the branch and check GitHub Actions** + +```bash +git push origin feat/fraud-detection +``` + +Open: `https://github.com/arcadedata/arcadedb-usecases/actions` + +Expected: a `Fraud Detection CI` run appears with two jobs — `test (curl)` and `test (java)`. + +**Step 2: Confirm both jobs pass** + +Both `test (curl)` and `test (java)` should show green checkmarks. If either fails, check the step-level logs: + +- **Start ArcadeDB fails:** confirm `docker compose up -d` runs from the `fraud-detection/` directory — check `working-directory`. Note: uses `arcadedata/arcadedb:26.3.1-SNAPSHOT` which must be available on Docker Hub; if the image doesn't exist yet, the job will fail at this step +- **Setup database fails:** `setup.sh` may be timing out waiting for ArcadeDB; check if the healthcheck `retries: 20` at 5s intervals (100s total) is enough — if not, add a `docker compose ps` debug step before `setup.sh` +- **curl queries fail:** confirm `jq` is available with `which jq`; check the `ARCADEDB_PASS` env var is picked up by `queries.sh`. Some queries use ArcadeDB 26.3.1-SNAPSHOT features (`time_bucket`, `vectorDistance`, `full_name.similarity`) — if the server version doesn't support them, the query will return an error +- **Java build fails:** the `arcadedb-network:26.3.1-SNAPSHOT` dependency must be available in Maven Central or a configured snapshot repository; if not, `mvn package` will fail resolving dependencies +- **Java run fails:** the fat JAR should be at `target/fraud-detection.jar`; confirm `finalName` in `pom.xml` matches + +**Step 3: No further commit needed if both pass** diff --git a/docs/plans/2026-03-02-fraud-detection-design.md b/docs/plans/2026-03-02-fraud-detection-design.md new file mode 100644 index 0000000..3524ef1 --- /dev/null +++ b/docs/plans/2026-03-02-fraud-detection-design.md @@ -0,0 +1,257 @@ +# Fraud Detection Use Case — Design + +**Date:** 2026-03-02 +**Branch:** feat/fraud-detection +**ArcadeDB version:** 26.3.1-SNAPSHOT + +## Overview + +Implement the [ArcadeDB Fraud Detection](https://arcadedb.com/fraud-detection.html) use case as the second entry in the `arcadedb-usecases` repository. The use case demonstrates ArcadeDB's ability to unify multiple detection capabilities — graph relationship analysis, vector-based behavioral anomaly detection, time-series pattern identification, and document queries — in a single multi-model database. + +## Repository Structure + +Self-contained directory, same layout as the recommendation-engine: + +``` +fraud-detection/ +├── README.md +├── docker-compose.yml +├── setup.sh +├── sql/ +│ ├── 01-schema.sql +│ └── 02-data.sql +├── queries/ +│ └── queries.sh +└── java/ + ├── pom.xml + └── src/main/java/ + └── com/arcadedb/examples/FraudDetection.java +``` + +## Docker Compose + +- Single service: `arcadedata/arcadedb:26.3.1-SNAPSHOT` +- HTTP API port exposed: `2480` +- Root credentials passed as environment variables (`JAVA_OPTS: -Darcadedb.server.rootPassword=arcadedb`) +- Healthcheck on `/api/v1/ready` + +## Schema (`sql/01-schema.sql`) + +Eight vertex types, six edge types, one document type. + +**Vertices:** +- `Account` — `id` (STRING), `name` (STRING), `full_name` (STRING), `ssn` (STRING), `credit_limit` (FLOAT), `balance` (FLOAT) +- `Customer` — `id` (STRING), `baseline_behavior` (STRING), `recent_behavior` (STRING), `profile_embedding` (LIST) +- `Device` — `id` (STRING), `fingerprint` (STRING) +- `Phone` — `number` (STRING) +- `Address` — `street` (STRING), `city` (STRING), `zip` (STRING) +- `Email` — `address` (STRING) +- `Beneficiary` — `id` (STRING), `name` (STRING) +- `Transaction` — `id` (STRING), `amount` (FLOAT), `merchant` (STRING), `behavior_embedding` (LIST), `ts` (DATETIME) + +**Edges:** +- `USES_DEVICE` — Account → Device +- `HAS_PHONE` — Account → Phone +- `HAS_ADDRESS` — Account → Address +- `HAS_EMAIL` — Account → Email +- `TRANSFERRED_TO` — Account → Account (properties: `amount` FLOAT, `ts` DATETIME) +- `BENEFICIARY_OF` — Account → Beneficiary + +**Document types:** +- `Deposit` — `account_id` (STRING), `amount` (FLOAT), `ts` (DATETIME) + +**Indexes:** +- `UNIQUE` on `Account(id)`, `Customer(id)`, `Transaction(id)` +- `LSM_VECTOR` on `Customer(profile_embedding)` — 8 dimensions, COSINE +- `LSM_VECTOR` on `Transaction(behavior_embedding)` — 8 dimensions, COSINE +- `FULL_TEXT` on `Account(full_name)` + +## Sample Data (`sql/02-data.sql`) + +Approximately 60–70 records across 11 accounts with distinct fraud patterns: + +**Fraud Ring (accounts A–E):** +- 5 accounts sharing one Device (`dev-shared`) and one Phone (`phone-shared`) +- Circular transfers A→B→C→D→E→A, amounts $8,000–$9,500, spread over 30 days +- Each account has its own unique Email +- 3+ deposits per day in the $8,000–$9,999 range (structuring pattern) + +**Synthetic Identity Pair (accounts F–G):** +- `acct-F` ("Robert J. Smith", SSN "123-45-6789") and `acct-G` ("Rob Smith Jr.", same SSN) +- Same Address, different Phones and Emails +- `full_name` similarity between 0.4–0.9 + +**Velocity Attacker (account H):** +- 10+ transactions in a 5-minute window +- `behavior_embedding` deviates significantly from Customer `profile_embedding` (vectorDistance > 0.7) + +**Legitimate Accounts (L1–L3):** +- Each has unique Device, Phone, Address, Email +- Normal transfer patterns, occasional deposits of varying amounts +- `behavior_embedding` close to `profile_embedding` (vectorDistance < 0.3) + +**Customers:** +- 11 Customer records (one per account) with 8-dimensional `profile_embedding` vectors +- Fraud ring members share similar embeddings; legitimate accounts have distinct profiles + +All embeddings use 8-dimensional float arrays. + +## Queries + +Eight query patterns covering all four signal types: + +| # | Pattern | Language | Signal Type | +|---|---------|----------|-------------| +| 1 | Fraud Ring Detection | Cypher | Graph | +| 2 | Synthetic Identity Resolution | SQL | Document | +| 3 | Circular Money Flow | Cypher | Graph | +| 4 | Structuring Detection | SQL | Time-Series | +| 5 | Behavioral Anomaly | SQL | Vector | +| 6 | Velocity Attack Detection | SQL | Time-Series | +| 7 | Correlated Account Activity | SQL | Time-Series | +| 8 | Cross-Type Investigation | SQL | Combined | + +### Query 1: Fraud Ring Detection (Graph Traversal) + +Multi-hop traversal through shared identifiers to find accounts connected to a flagged account: + +```cypher +MATCH (flagged:Account {id: 'acct-A'}) + -[:USES_DEVICE|HAS_PHONE|HAS_ADDRESS*1..4]- + (connected:Account) +WHERE connected <> flagged +RETURN DISTINCT connected.id, connected.name +``` + +### Query 2: Synthetic Identity Resolution (Document) + +Find accounts sharing the same SSN (indicating synthetic identity fraud): + +```sql +SELECT id, full_name, ssn +FROM Account +WHERE ssn = '123-45-6789' +ORDER BY id +``` + +> **Note:** The original design used `SEARCH_INDEX()` for full-text fuzzy matching and +> `full_name.similarity()` for name comparison, but ArcadeDB does not support +> `SEARCH_INDEX()` in WHERE clauses. The SSN equality filter achieves the same +> detection goal for the demo dataset. + +### Query 3: Circular Money Flow (Graph Cycles) + +Detect the A→B→C→D→E→A circular transfer path: + +```cypher +MATCH (origin:Account {id: 'acct-A'}) + -[:TRANSFERRED_TO]->(b:Account) + -[:TRANSFERRED_TO]->(c:Account) + -[:TRANSFERRED_TO]->(d:Account) + -[:TRANSFERRED_TO]->(e:Account) + -[:TRANSFERRED_TO]->(origin) +RETURN origin.id AS origin, b.id AS hop1, c.id AS hop2, d.id AS hop3, e.id AS hop4 +``` + +> **Note:** The original design used variable-length paths (`*3..6`) with `all()` +> predicate and `datetime() - duration()`, but ArcadeDB's Cypher implementation +> does not support these features. The explicit 5-hop pattern works for the known +> fraud ring topology. + +### Query 4: Structuring Detection (Time-Series Bucketing) + +Flag accounts making 3+ deposits per day in the $8,000–$9,999 range: + +```sql +SELECT time_bucket('1d', ts) AS day, account_id, count(*) AS deposit_count +FROM Deposit +WHERE amount BETWEEN 8000 AND 9999 +GROUP BY day, account_id +HAVING deposit_count >= 3 +``` + +### Query 5: Behavioral Anomaly (Vector Distance) + +Detect transactions whose behavioral embedding deviates from the customer's profile: + +```sql +SELECT t.id, t.amount, t.merchant, + vectorDistance(t.behavior_embedding, c.profile_embedding) AS deviation +FROM Transaction t +JOIN Customer c ON t.account_id = c.id +WHERE vectorDistance(t.behavior_embedding, c.profile_embedding) > 0.7 +ORDER BY deviation DESC +``` + +### Query 6: Velocity Attack Detection (Time-Series Rate) + +Detect accounts with abnormally high transaction rates over a 5-minute window: + +```sql +SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn +FROM Transaction +WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' +GROUP BY account_id +HAVING txn_count > 5 +``` + +### Query 7: Correlated Account Activity (Time-Series Correlation) + +Detect coordinated transfer activity between two accounts: + +```sql +SELECT a.account_id AS account_a, b.account_id AS account_b, + avg(a.amount) AS avg_a, avg(b.amount) AS avg_b, + count(*) AS matching_txns +FROM Transaction a, Transaction b +WHERE a.account_id = 'acct-A' AND b.account_id = 'acct-B' + AND a.ts >= '2026-02-01T00:00:00Z' + AND b.ts >= '2026-02-01T00:00:00Z' +``` + +### Query 8: Multi-Model Investigation (Combined) + +Composite risk score blending graph distance, temporal patterns, and behavioral deviation. Starts with graph traversal to find connected accounts, enriches with velocity and vector anomaly scores. + +## curl Queries (`queries/queries.sh`) + +Eight labeled sections, one per query pattern, each POSTing to `http://localhost:2480/api/v1/query/FraudDetection`. Same `query()` helper function as the recommendation-engine. + +All queries use hardcoded values matching `02-data.sql` (known account IDs, the shared device, the synthetic identity SSN) so the script works out-of-the-box after setup. + +## Java Program (`java/`) + +- **Build tool:** Maven (standalone `pom.xml`, no parent) +- **Dependency:** `com.arcadedb:arcadedb-network:26.3.1-SNAPSHOT` +- **Output:** executable fat JAR via `maven-assembly-plugin` (`mvn package` → `java -jar target/fraud-detection.jar`) +- **Entry point:** single `FraudDetection.java` with a `main` method that: + 1. Opens a `RemoteDatabase` connection to `localhost:2480` + 2. Runs all 8 queries sequentially, each wrapped in `tryRun()` + 3. Prints a header and formatted results for each query to stdout + 4. Closes the connection + +## Query Language Mapping + +| # | Pattern | Language | +|---|---------|----------| +| 1 | Fraud Ring Detection | Cypher | +| 2 | Synthetic Identity Resolution | SQL | +| 3 | Circular Money Flow | Cypher | +| 4 | Structuring Detection | SQL | +| 5 | Behavioral Anomaly | SQL | +| 6 | Velocity Attack Detection | SQL | +| 7 | Correlated Account Activity | SQL | +| 8 | Multi-Model Investigation | SQL | + +## Success Criteria + +- `docker compose up` starts ArcadeDB 26.3.1-SNAPSHOT successfully +- SQL files apply cleanly via `setup.sh` with no errors +- `queries.sh` runs all 8 queries and returns non-empty result sets +- `mvn package && java -jar target/fraud-detection.jar` runs all 8 queries and prints results to stdout +- Fraud ring query returns accounts B–E when investigating account A +- Synthetic identity query returns the F/G pair +- Circular flow query detects the A→B→C→D→E→A cycle +- Structuring query flags fraud ring accounts with 3+ sub-$10K deposits per day +- Behavioral anomaly query flags account H's transactions +- Velocity query flags account H diff --git a/fraud-detection/README.md b/fraud-detection/README.md new file mode 100644 index 0000000..570c384 --- /dev/null +++ b/fraud-detection/README.md @@ -0,0 +1,100 @@ +# Fraud Detection + +Demonstrates ArcadeDB's multi-model capabilities by implementing a fraud detection +system that unifies four signal types in a single database: + +- **Graph traversal** — fraud ring detection via shared identifier patterns +- **Vector similarity** — behavioral anomaly detection using embeddings +- **Time-series** — structuring and velocity attack detection via temporal analysis +- **Document queries** — synthetic identity resolution via shared SSN detection + +## Prerequisites + +- Docker and Docker Compose +- `curl` and `jq` +- Java 21+ and Maven 3.x (for the Java demo) + +## Quickstart + +### 1. Start ArcadeDB + +```bash +docker compose up -d +``` + +### 2. Create database and load data + +```bash +./setup.sh +``` + +This creates the `FraudDetection` database, applies the schema, and inserts sample data. + +### 3a. Run queries via curl + +```bash +./queries/queries.sh +``` + +### 3b. Run queries via Java + +```bash +cd java +mvn package -q +java -jar target/fraud-detection.jar +``` + +## Schema + +| Type | Kind | Key properties | +|------|------|----------------| +| `Account` | Vertex | `id`, `name`, `full_name`, `ssn`, `credit_limit`, `balance` | +| `Customer` | Vertex | `id`, `baseline_behavior`, `recent_behavior`, `profile_embedding` | +| `Device` | Vertex | `id`, `fingerprint` | +| `Phone` | Vertex | `number` | +| `Address` | Vertex | `street`, `city`, `zip` | +| `Email` | Vertex | `address` | +| `Beneficiary` | Vertex | `id`, `name` | +| `Transaction` | Vertex | `id`, `amount`, `merchant`, `behavior_embedding`, `ts` | +| `USES_DEVICE` | Edge | Account → Device | +| `HAS_PHONE` | Edge | Account → Phone | +| `HAS_ADDRESS` | Edge | Account → Address | +| `HAS_EMAIL` | Edge | Account → Email | +| `TRANSFERRED_TO` | Edge | Account → Account (`amount`, `ts`) | +| `BENEFICIARY_OF` | Edge | Account → Beneficiary | +| `Deposit` | Document | `account_id`, `amount`, `ts` | + +## Query Patterns + +| # | Pattern | Language | Signal type | +|---|---------|----------|-------------| +| 1 | Fraud Ring Detection | Cypher | Graph | +| 2 | Synthetic Identity Resolution | SQL | Document | +| 3 | Circular Money Flow | Cypher | Graph | +| 4 | Structuring Detection | SQL + subquery | Time-Series | +| 5 | Behavioral Anomaly | SQL + vectorCosineSimilarity() | Vector | +| 6 | Velocity Attack Detection | SQL | Time-Series | +| 7 | Correlated Account Activity | SQL | Time-Series | +| 8 | Cross-Type Investigation | SQL + subquery | Combined | + +## Sample Data + +- 11 accounts across four profiles: fraud ring (A–E), synthetic identity pair (F–G), + velocity attacker (H), and legitimate users (L1–L3) +- 11 customers with 8-dimensional profile embedding vectors +- Shared Device and Phone for fraud ring members; unique identifiers for others +- Circular transfers A→B→C→D→E→A ($8K–$9.5K over 30 days) +- 10 rapid-fire transactions for account H (velocity pattern) +- Structuring deposits: 3+ per day in the $8K–$9,999 range for fraud ring +- Normal transactions and deposits for legitimate accounts + +## ArcadeDB Version Notes + +This use case targets ArcadeDB **26.3.1-SNAPSHOT**. It uses: +- `vectorCosineSimilarity()` for behavioral anomaly detection with `LSM_VECTOR` indexes +- Subquery wrapping for `HAVING`-equivalent filtering (ArcadeDB does not support `HAVING`) +- Cypher queries for graph traversal and cycle detection + +## Reference + +[ArcadeDB Fraud Detection use case](https://arcadedb.com/fraud-detection.html) diff --git a/fraud-detection/docker-compose.yml b/fraud-detection/docker-compose.yml new file mode 100644 index 0000000..011f8bc --- /dev/null +++ b/fraud-detection/docker-compose.yml @@ -0,0 +1,13 @@ +services: + arcadedb: + image: arcadedata/arcadedb:26.3.1-SNAPSHOT + ports: + - "2480:2480" + environment: + JAVA_OPTS: "-Darcadedb.server.rootPassword=arcadedb" + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:2480/api/v1/ready"] + interval: 5s + timeout: 3s + retries: 20 + start_period: 10s diff --git a/fraud-detection/java/pom.xml b/fraud-detection/java/pom.xml new file mode 100644 index 0000000..43d261b --- /dev/null +++ b/fraud-detection/java/pom.xml @@ -0,0 +1,71 @@ + + + 4.0.0 + + com.arcadedb.examples + fraud-detection + 1.0-SNAPSHOT + jar + + + 21 + 21 + UTF-8 + 26.3.1-SNAPSHOT + + + + + Central Portal Snapshots + central-portal-snapshots + https://central.sonatype.com/repository/maven-snapshots/ + + false + + + true + + + + + + + com.arcadedb + arcadedb-network + ${arcadedb.version} + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.8.0 + + + + com.arcadedb.examples.FraudDetection + + + + jar-with-dependencies + + fraud-detection + false + + + + make-assembly + package + + single + + + + + + + diff --git a/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java new file mode 100644 index 0000000..53d8f29 --- /dev/null +++ b/fraud-detection/java/src/main/java/com/arcadedb/examples/FraudDetection.java @@ -0,0 +1,237 @@ +package com.arcadedb.examples; + +import com.arcadedb.query.sql.executor.Result; +import com.arcadedb.query.sql.executor.ResultSet; +import com.arcadedb.remote.RemoteDatabase; + +public class FraudDetection { + + private static final String HOST = System.getenv().getOrDefault("ARCADEDB_HOST", "localhost"); + private static final int PORT = Integer.parseInt(System.getenv().getOrDefault("ARCADEDB_PORT", "2480")); + private static final String DB_NAME = "FraudDetection"; + private static final String USER = System.getenv().getOrDefault("ARCADEDB_USER", "root"); + private static final String PASSWORD = System.getenv().getOrDefault("ARCADEDB_PASS", "arcadedb"); + + public static void main(String[] args) { + try (RemoteDatabase db = new RemoteDatabase(HOST, PORT, DB_NAME, USER, PASSWORD)) { + tryRun(() -> runQuery1FraudRing(db), "Query 1"); + tryRun(() -> runQuery2SyntheticIdentity(db), "Query 2"); + tryRun(() -> runQuery3CircularFlow(db), "Query 3"); + tryRun(() -> runQuery4Structuring(db), "Query 4"); + tryRun(() -> runQuery5BehavioralAnomaly(db), "Query 5"); + tryRun(() -> runQuery6VelocityAttack(db), "Query 6"); + tryRun(() -> runQuery7CorrelatedActivity(db), "Query 7"); + tryRun(() -> runQuery8MultiModel(db), "Query 8"); + } + System.out.println("\nAll queries complete."); + } + + private static void tryRun(Runnable r, String name) { + try { + r.run(); + } catch (Exception e) { + System.err.println("[" + name + " FAILED] " + e.getMessage()); + } + } + + // Query 1: Fraud Ring Detection (Graph Traversal) + private static void runQuery1FraudRing(RemoteDatabase db) { + printHeader("Query 1: Fraud Ring Detection (Graph Traversal)", + "Find accounts connected to acct-A through shared identifiers."); + + String cypher = + """ + MATCH (flagged:Account {id: 'acct-A'}) + -[:USES_DEVICE|HAS_PHONE|HAS_ADDRESS*1..4]- + (connected:Account) + WHERE connected <> flagged + RETURN DISTINCT connected.id, connected.name"""; + + try (ResultSet rs = db.query("cypher", cypher)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-12s | %s%n", + r.getProperty("connected.id"), + r.getProperty("connected.name")); + } + } + } + + // Query 2: Synthetic Identity Resolution + private static void runQuery2SyntheticIdentity(RemoteDatabase db) { + printHeader("Query 2: Synthetic Identity Resolution", + "Find accounts sharing the same SSN (indicating synthetic identity fraud)."); + + String sql = + """ + SELECT id, full_name, ssn + FROM Account + WHERE ssn = '123-45-6789' + ORDER BY id"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-10s | %-20s | ssn: %s%n", + r.getProperty("id"), + r.getProperty("full_name"), + r.getProperty("ssn")); + } + } + } + + // Query 3: Circular Money Flow (Graph Cycles) + private static void runQuery3CircularFlow(RemoteDatabase db) { + printHeader("Query 3: Circular Money Flow (Graph Cycles)", + "Detect the A->B->C->D->E->A circular transfer path."); + + String cypher = + """ + MATCH (origin:Account {id: 'acct-A'}) + -[:TRANSFERRED_TO]->(b:Account) + -[:TRANSFERRED_TO]->(c:Account) + -[:TRANSFERRED_TO]->(d:Account) + -[:TRANSFERRED_TO]->(e:Account) + -[:TRANSFERRED_TO]->(origin) + RETURN origin.id AS origin, b.id AS hop1, c.id AS hop2, d.id AS hop3, e.id AS hop4"""; + + try (ResultSet rs = db.query("cypher", cypher)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" origin: %s -> %s -> %s -> %s -> %s -> (back to origin)%n", + r.getProperty("origin"), + r.getProperty("hop1"), + r.getProperty("hop2"), + r.getProperty("hop3"), + r.getProperty("hop4")); + } + } + } + + // Query 4: Structuring Detection (Time-Series) + private static void runQuery4Structuring(RemoteDatabase db) { + printHeader("Query 4: Structuring Detection (Time-Series)", + "Flag accounts making 3+ deposits in the $8,000-$9,999 range."); + + String sql = + """ + SELECT FROM ( + SELECT account_id, count(*) AS deposit_count + FROM Deposit + WHERE amount BETWEEN 8000 AND 9999 + GROUP BY account_id + ) WHERE deposit_count >= 3"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" account: %-10s | deposits: %s%n", + r.getProperty("account_id"), + r.getProperty("deposit_count")); + } + } + } + + // Query 5: Behavioral Anomaly (Vector Similarity) + private static void runQuery5BehavioralAnomaly(RemoteDatabase db) { + printHeader("Query 5: Behavioral Anomaly (Vector Similarity)", + "Detect acct-H transactions deviating from customer profile via cosine similarity."); + + String sql = + """ + SELECT id, amount, merchant, account_id, + vectorCosineSimilarity(behavior_embedding, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) AS profile_similarity + FROM Transaction + WHERE account_id = 'acct-H' + ORDER BY profile_similarity"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-10s | $%-10.2f | %-15s | similarity: %s%n", + r.getProperty("id"), + ((Number) r.getProperty("amount")).doubleValue(), + r.getProperty("merchant"), + r.getProperty("profile_similarity")); + } + } + } + + // Query 6: Velocity Attack Detection (Time-Series) + private static void runQuery6VelocityAttack(RemoteDatabase db) { + printHeader("Query 6: Velocity Attack Detection (Time-Series)", + "Detect accounts with abnormally high transaction rates in a 5-minute window."); + + String sql = + """ + SELECT FROM ( + SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn + FROM Transaction + WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' + GROUP BY account_id + ) WHERE txn_count > 5"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" account: %-10s | txns: %-5s | from: %s | to: %s%n", + r.getProperty("account_id"), + r.getProperty("txn_count"), + r.getProperty("first_txn"), + r.getProperty("last_txn")); + } + } + } + + // Query 7: Correlated Account Activity (Time-Series) + private static void runQuery7CorrelatedActivity(RemoteDatabase db) { + printHeader("Query 7: Correlated Account Activity (Time-Series)", + "Compare transfer patterns between two accounts to detect coordination."); + + String sql = + """ + SELECT account_id, avg(amount) AS avg_amount, count(*) AS txn_count + FROM Transaction + WHERE account_id IN ['acct-A', 'acct-B'] + AND ts >= '2026-02-01T00:00:00Z' + GROUP BY account_id"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" account: %-10s | avg_amount: %-10s | txns: %s%n", + r.getProperty("account_id"), + r.getProperty("avg_amount"), + r.getProperty("txn_count")); + } + } + } + + // Query 8: Cross-Type Investigation (SQL Subquery) + private static void runQuery8MultiModel(RemoteDatabase db) { + printHeader("Query 8: Cross-Type Investigation (SQL Subquery)", + "Find accounts linked to customers flagged as suspicious or anomalous."); + + String sql = + """ + SELECT id, name + FROM Account + WHERE id IN (SELECT id FROM Customer WHERE recent_behavior IN ['suspicious', 'anomalous'])"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-10s | %s%n", + r.getProperty("id"), + r.getProperty("name")); + } + } + } + + private static void printHeader(String title, String description) { + System.out.println("\n" + "=".repeat(70)); + System.out.println(" " + title); + System.out.println(" " + description); + System.out.println("=".repeat(70)); + } +} diff --git a/fraud-detection/queries/queries.sh b/fraud-detection/queries/queries.sh new file mode 100755 index 0000000..f042030 --- /dev/null +++ b/fraud-detection/queries/queries.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +# Fraud Detection — all eight query patterns via curl +# Prerequisites: ArcadeDB running, setup.sh already executed, jq installed +# Usage: ./queries/queries.sh + +set -euo pipefail + +ARCADEDB_URL="${ARCADEDB_URL:-http://localhost:2480}" +ARCADEDB_USER="${ARCADEDB_USER:-root}" +ARCADEDB_PASS="${ARCADEDB_PASS:-arcadedb}" +AUTH="${ARCADEDB_USER}:${ARCADEDB_PASS}" +DB="FraudDetection" +QUERY_URL="${ARCADEDB_URL}/api/v1/query/${DB}" + +query() { + local lang="$1" cmd="$2" + jq -cn --arg l "$lang" --arg c "$cmd" '{"language":$l,"command":$c}' \ + | curl -sf -u "$AUTH" -X POST "$QUERY_URL" \ + -H "Content-Type: application/json" -d @- \ + | jq '.result' +} + +# ───────────────────────────────────────────────────────────────────────────── +echo "=== Query 1: Fraud Ring Detection (Graph Traversal) ===" +echo "Find accounts connected to a flagged account through shared identifiers." +echo "" +query "cypher" " +MATCH (flagged:Account {id: 'acct-A'}) + -[:USES_DEVICE|HAS_PHONE|HAS_ADDRESS*1..4]- + (connected:Account) +WHERE connected <> flagged +RETURN DISTINCT connected.id, connected.name +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 2: Synthetic Identity Resolution ===" +echo "Find accounts sharing the same SSN (indicating synthetic identity fraud)." +echo "" +query "sql" " +SELECT id, full_name, ssn +FROM Account +WHERE ssn = '123-45-6789' +ORDER BY id +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 3: Circular Money Flow (Graph Cycles) ===" +echo "Detect the A->B->C->D->E->A circular transfer path." +echo "" +query "cypher" " +MATCH (origin:Account {id: 'acct-A'}) + -[:TRANSFERRED_TO]->(b:Account) + -[:TRANSFERRED_TO]->(c:Account) + -[:TRANSFERRED_TO]->(d:Account) + -[:TRANSFERRED_TO]->(e:Account) + -[:TRANSFERRED_TO]->(origin) +RETURN origin.id AS origin, b.id AS hop1, c.id AS hop2, d.id AS hop3, e.id AS hop4 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 4: Structuring Detection (Time-Series) ===" +echo "Flag accounts making 3+ deposits in the \$8,000–\$9,999 range." +echo "" +query "sql" " +SELECT FROM ( + SELECT account_id, count(*) AS deposit_count + FROM Deposit + WHERE amount BETWEEN 8000 AND 9999 + GROUP BY account_id +) WHERE deposit_count >= 3 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 5: Behavioral Anomaly (Vector Similarity) ===" +echo "Detect acct-H transactions deviating from customer profile via cosine similarity." +echo "" +query "sql" " +SELECT id, amount, merchant, account_id, + vectorCosineSimilarity(behavior_embedding, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) AS profile_similarity +FROM Transaction +WHERE account_id = 'acct-H' +ORDER BY profile_similarity +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 6: Velocity Attack Detection (Time-Series) ===" +echo "Detect accounts with abnormally high transaction rates in a 5-minute window." +echo "" +query "sql" " +SELECT FROM ( + SELECT account_id, count(*) AS txn_count, min(ts) AS first_txn, max(ts) AS last_txn + FROM Transaction + WHERE ts BETWEEN '2026-03-01T13:00:00Z' AND '2026-03-01T13:05:00Z' + GROUP BY account_id +) WHERE txn_count > 5 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 7: Correlated Account Activity (Time-Series) ===" +echo "Compare transfer patterns between two accounts to detect coordination." +echo "" +query "sql" " +SELECT account_id, avg(amount) AS avg_amount, count(*) AS txn_count +FROM Transaction +WHERE account_id IN ['acct-A', 'acct-B'] + AND ts >= '2026-02-01T00:00:00Z' +GROUP BY account_id +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 8: Cross-Type Investigation (SQL Subquery) ===" +echo "Find accounts linked to customers flagged as suspicious or anomalous." +echo "" +query "sql" " +SELECT id, name +FROM Account +WHERE id IN (SELECT id FROM Customer WHERE recent_behavior IN ['suspicious', 'anomalous']) +" diff --git a/fraud-detection/setup.sh b/fraud-detection/setup.sh new file mode 100755 index 0000000..0c1b5c9 --- /dev/null +++ b/fraud-detection/setup.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -euo pipefail + +ARCADEDB_URL="${ARCADEDB_URL:-http://localhost:2480}" +ARCADEDB_USER="${ARCADEDB_USER:-root}" +ARCADEDB_PASS="${ARCADEDB_PASS:-arcadedb}" +DB_NAME="FraudDetection" + +# ── Wait for ArcadeDB ───────────────────────────────────────────────────────── +echo "Waiting for ArcadeDB at ${ARCADEDB_URL}..." +until curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + "${ARCADEDB_URL}/api/v1/ready" > /dev/null 2>&1; do + sleep 2 +done +echo "ArcadeDB is ready." + +# ── Create database ─────────────────────────────────────────────────────────── +echo "Creating database ${DB_NAME}..." +curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + -X POST "${ARCADEDB_URL}/api/v1/server" \ + -H "Content-Type: application/json" \ + -d "{\"command\": \"create database ${DB_NAME}\"}" > /dev/null || true +echo "Database ready." + +# ── Helper: send one SQL statement ─────────────────────────────────────────── +send_sql() { + local stmt="$1" + jq -cn --arg cmd "$stmt" '{"language":"sql","command":$cmd}' \ + | curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + -X POST "${ARCADEDB_URL}/api/v1/command/${DB_NAME}" \ + -H "Content-Type: application/json" \ + -d @- > /dev/null +} + +# ── Apply a SQL file (one statement per line) ───────────────────────────────── +apply_file() { + local file="$1" + echo "Applying ${file}..." + while IFS= read -r line || [[ -n "$line" ]]; do + # skip blank lines and SQL comments + [[ -z "${line//[[:space:]]/}" || "$line" =~ ^[[:space:]]*-- ]] && continue + send_sql "${line%%;}" + done < "$file" + echo "Done: ${file}" +} + +apply_file "sql/01-schema.sql" +apply_file "sql/02-data.sql" + +echo "" +echo "Setup complete. ${DB_NAME} is ready." diff --git a/fraud-detection/sql/01-schema.sql b/fraud-detection/sql/01-schema.sql new file mode 100644 index 0000000..d1aae66 --- /dev/null +++ b/fraud-detection/sql/01-schema.sql @@ -0,0 +1,54 @@ +-- Vertex types +CREATE VERTEX TYPE Account IF NOT EXISTS; +CREATE PROPERTY Account.id IF NOT EXISTS STRING; +CREATE PROPERTY Account.name IF NOT EXISTS STRING; +CREATE PROPERTY Account.full_name IF NOT EXISTS STRING; +CREATE PROPERTY Account.ssn IF NOT EXISTS STRING; +CREATE PROPERTY Account.credit_limit IF NOT EXISTS FLOAT; +CREATE PROPERTY Account.balance IF NOT EXISTS FLOAT; +CREATE VERTEX TYPE Customer IF NOT EXISTS; +CREATE PROPERTY Customer.id IF NOT EXISTS STRING; +CREATE PROPERTY Customer.baseline_behavior IF NOT EXISTS STRING; +CREATE PROPERTY Customer.recent_behavior IF NOT EXISTS STRING; +CREATE PROPERTY Customer.profile_embedding IF NOT EXISTS LIST; +CREATE VERTEX TYPE Device IF NOT EXISTS; +CREATE PROPERTY Device.id IF NOT EXISTS STRING; +CREATE PROPERTY Device.fingerprint IF NOT EXISTS STRING; +CREATE VERTEX TYPE Phone IF NOT EXISTS; +CREATE PROPERTY Phone.number IF NOT EXISTS STRING; +CREATE VERTEX TYPE Address IF NOT EXISTS; +CREATE PROPERTY Address.street IF NOT EXISTS STRING; +CREATE PROPERTY Address.city IF NOT EXISTS STRING; +CREATE PROPERTY Address.zip IF NOT EXISTS STRING; +CREATE VERTEX TYPE Email IF NOT EXISTS; +CREATE PROPERTY Email.address IF NOT EXISTS STRING; +CREATE VERTEX TYPE Beneficiary IF NOT EXISTS; +CREATE PROPERTY Beneficiary.id IF NOT EXISTS STRING; +CREATE PROPERTY Beneficiary.name IF NOT EXISTS STRING; +CREATE VERTEX TYPE Transaction IF NOT EXISTS; +CREATE PROPERTY Transaction.id IF NOT EXISTS STRING; +CREATE PROPERTY Transaction.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY Transaction.merchant IF NOT EXISTS STRING; +CREATE PROPERTY Transaction.behavior_embedding IF NOT EXISTS LIST; +CREATE PROPERTY Transaction.ts IF NOT EXISTS DATETIME; +CREATE PROPERTY Transaction.account_id IF NOT EXISTS STRING; +-- Edge types +CREATE EDGE TYPE USES_DEVICE IF NOT EXISTS; +CREATE EDGE TYPE HAS_PHONE IF NOT EXISTS; +CREATE EDGE TYPE HAS_ADDRESS IF NOT EXISTS; +CREATE EDGE TYPE HAS_EMAIL IF NOT EXISTS; +CREATE EDGE TYPE TRANSFERRED_TO IF NOT EXISTS; +CREATE PROPERTY TRANSFERRED_TO.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY TRANSFERRED_TO.ts IF NOT EXISTS DATETIME; +CREATE EDGE TYPE BENEFICIARY_OF IF NOT EXISTS; +-- Document types +CREATE DOCUMENT TYPE Deposit IF NOT EXISTS; +CREATE PROPERTY Deposit.account_id IF NOT EXISTS STRING; +CREATE PROPERTY Deposit.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY Deposit.ts IF NOT EXISTS DATETIME; +-- Indexes +CREATE INDEX IF NOT EXISTS ON Account (id) UNIQUE; +CREATE INDEX IF NOT EXISTS ON Customer (id) UNIQUE; +CREATE INDEX IF NOT EXISTS ON Transaction (id) UNIQUE; +CREATE INDEX IF NOT EXISTS ON Customer (profile_embedding) LSM_VECTOR METADATA { dimensions: 8, similarity: 'COSINE' }; +CREATE INDEX IF NOT EXISTS ON Transaction (behavior_embedding) LSM_VECTOR METADATA { dimensions: 8, similarity: 'COSINE' }; diff --git a/fraud-detection/sql/02-data.sql b/fraud-detection/sql/02-data.sql new file mode 100644 index 0000000..0fafbdb --- /dev/null +++ b/fraud-detection/sql/02-data.sql @@ -0,0 +1,165 @@ +-- ── Fraud Ring Accounts (A–E) ──────────────────────────────────────────────── +INSERT INTO Account SET id = 'acct-A', name = 'Alice', full_name = 'Alice M. Johnson', ssn = '111-22-3333', credit_limit = 10000.0, balance = 8500.0; +INSERT INTO Account SET id = 'acct-B', name = 'Bob', full_name = 'Bob K. Williams', ssn = '222-33-4444', credit_limit = 10000.0, balance = 9200.0; +INSERT INTO Account SET id = 'acct-C', name = 'Carol', full_name = 'Carol P. Davis', ssn = '333-44-5555', credit_limit = 10000.0, balance = 8800.0; +INSERT INTO Account SET id = 'acct-D', name = 'Dan', full_name = 'Daniel R. Miller', ssn = '444-55-6666', credit_limit = 10000.0, balance = 9100.0; +INSERT INTO Account SET id = 'acct-E', name = 'Eve', full_name = 'Eve S. Wilson', ssn = '555-66-7777', credit_limit = 10000.0, balance = 8700.0; +-- ── Synthetic Identity Pair (F–G) ─────────────────────────────────────────── +INSERT INTO Account SET id = 'acct-F', name = 'Robert', full_name = 'Robert J. Smith', ssn = '123-45-6789', credit_limit = 15000.0, balance = 12000.0; +INSERT INTO Account SET id = 'acct-G', name = 'Rob', full_name = 'Rob Smith Jr.', ssn = '123-45-6789', credit_limit = 8000.0, balance = 5000.0; +-- ── Velocity Attacker (H) ─────────────────────────────────────────────────── +INSERT INTO Account SET id = 'acct-H', name = 'Hank', full_name = 'Hank T. Brown', ssn = '666-77-8888', credit_limit = 5000.0, balance = 200.0; +-- ── Legitimate Accounts (L1–L3) ───────────────────────────────────────────── +INSERT INTO Account SET id = 'acct-L1', name = 'Liam', full_name = 'Liam O. Garcia', ssn = '777-88-9999', credit_limit = 20000.0, balance = 15000.0; +INSERT INTO Account SET id = 'acct-L2', name = 'Lisa', full_name = 'Lisa N. Chen', ssn = '888-99-0000', credit_limit = 25000.0, balance = 22000.0; +INSERT INTO Account SET id = 'acct-L3', name = 'Luke', full_name = 'Luke W. Taylor', ssn = '999-00-1111', credit_limit = 18000.0, balance = 16500.0; +-- ── Customers (one per account) ───────────────────────────────────────────── +INSERT INTO Customer SET id = 'acct-A', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]; +INSERT INTO Customer SET id = 'acct-B', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.88, 0.82, 0.72, 0.58, 0.48, 0.42, 0.32, 0.22]; +INSERT INTO Customer SET id = 'acct-C', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.91, 0.79, 0.68, 0.62, 0.52, 0.38, 0.28, 0.18]; +INSERT INTO Customer SET id = 'acct-D', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.87, 0.83, 0.73, 0.57, 0.47, 0.43, 0.33, 0.23]; +INSERT INTO Customer SET id = 'acct-E', baseline_behavior = 'normal', recent_behavior = 'suspicious', profile_embedding = [0.92, 0.78, 0.69, 0.61, 0.51, 0.39, 0.29, 0.19]; +INSERT INTO Customer SET id = 'acct-F', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1]; +INSERT INTO Customer SET id = 'acct-G', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.32, 0.38, 0.52, 0.58, 0.72, 0.78, 0.88, 0.12]; +INSERT INTO Customer SET id = 'acct-H', baseline_behavior = 'normal', recent_behavior = 'anomalous', profile_embedding = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]; +INSERT INTO Customer SET id = 'acct-L1', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]; +INSERT INTO Customer SET id = 'acct-L2', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6]; +INSERT INTO Customer SET id = 'acct-L3', baseline_behavior = 'normal', recent_behavior = 'normal', profile_embedding = [0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4]; +-- ── Devices ───────────────────────────────────────────────────────────────── +INSERT INTO Device SET id = 'dev-shared', fingerprint = 'fp-AABBCCDD'; +INSERT INTO Device SET id = 'dev-F', fingerprint = 'fp-FF001122'; +INSERT INTO Device SET id = 'dev-G', fingerprint = 'fp-GG334455'; +INSERT INTO Device SET id = 'dev-H', fingerprint = 'fp-HH667788'; +INSERT INTO Device SET id = 'dev-L1', fingerprint = 'fp-L1AABB00'; +INSERT INTO Device SET id = 'dev-L2', fingerprint = 'fp-L2CCDD00'; +INSERT INTO Device SET id = 'dev-L3', fingerprint = 'fp-L3EEFF00'; +-- ── Phones ────────────────────────────────────────────────────────────────── +INSERT INTO Phone SET number = '555-000-RING'; +INSERT INTO Phone SET number = '555-111-FFFF'; +INSERT INTO Phone SET number = '555-222-GGGG'; +INSERT INTO Phone SET number = '555-333-HHHH'; +INSERT INTO Phone SET number = '555-444-LLL1'; +INSERT INTO Phone SET number = '555-555-LLL2'; +INSERT INTO Phone SET number = '555-666-LLL3'; +-- ── Addresses ─────────────────────────────────────────────────────────────── +INSERT INTO Address SET street = '100 Ring Road', city = 'Fraudville', zip = '00001'; +INSERT INTO Address SET street = '200 Synth Ave', city = 'Faketown', zip = '00002'; +INSERT INTO Address SET street = '300 Velocity Blvd', city = 'Speedcity', zip = '00003'; +INSERT INTO Address SET street = '400 Legit Lane', city = 'Realville', zip = '10001'; +INSERT INTO Address SET street = '500 Honest St', city = 'Trustburg', zip = '10002'; +INSERT INTO Address SET street = '600 Genuine Dr', city = 'Goodtown', zip = '10003'; +-- ── Emails ────────────────────────────────────────────────────────────────── +INSERT INTO Email SET address = 'alice@example.com'; +INSERT INTO Email SET address = 'bob@example.com'; +INSERT INTO Email SET address = 'carol@example.com'; +INSERT INTO Email SET address = 'dan@example.com'; +INSERT INTO Email SET address = 'eve@example.com'; +INSERT INTO Email SET address = 'robert@example.com'; +INSERT INTO Email SET address = 'rob@example.com'; +INSERT INTO Email SET address = 'hank@example.com'; +INSERT INTO Email SET address = 'liam@example.com'; +INSERT INTO Email SET address = 'lisa@example.com'; +INSERT INTO Email SET address = 'luke@example.com'; +-- ── Beneficiaries ─────────────────────────────────────────────────────────── +INSERT INTO Beneficiary SET id = 'ben-shell1', name = 'Shell Corp Alpha'; +INSERT INTO Beneficiary SET id = 'ben-shell2', name = 'Shell Corp Beta'; +INSERT INTO Beneficiary SET id = 'ben-legit1', name = 'Acme Supplies'; +-- ── USES_DEVICE edges (fraud ring shares dev-shared) ──────────────────────── +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Device WHERE id = 'dev-shared'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-F') TO (SELECT FROM Device WHERE id = 'dev-F'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-G') TO (SELECT FROM Device WHERE id = 'dev-G'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-H') TO (SELECT FROM Device WHERE id = 'dev-H'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Device WHERE id = 'dev-L1'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Device WHERE id = 'dev-L2'); +CREATE EDGE USES_DEVICE FROM (SELECT FROM Account WHERE id = 'acct-L3') TO (SELECT FROM Device WHERE id = 'dev-L3'); +-- ── HAS_PHONE edges (fraud ring shares phone-shared) ──────────────────────── +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Phone WHERE number = '555-000-RING'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-F') TO (SELECT FROM Phone WHERE number = '555-111-FFFF'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-G') TO (SELECT FROM Phone WHERE number = '555-222-GGGG'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-H') TO (SELECT FROM Phone WHERE number = '555-333-HHHH'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Phone WHERE number = '555-444-LLL1'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Phone WHERE number = '555-555-LLL2'); +CREATE EDGE HAS_PHONE FROM (SELECT FROM Account WHERE id = 'acct-L3') TO (SELECT FROM Phone WHERE number = '555-666-LLL3'); +-- ── HAS_ADDRESS edges (F and G share same address) ────────────────────────── +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Address WHERE street = '100 Ring Road'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-F') TO (SELECT FROM Address WHERE street = '200 Synth Ave'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-G') TO (SELECT FROM Address WHERE street = '200 Synth Ave'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-H') TO (SELECT FROM Address WHERE street = '300 Velocity Blvd'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Address WHERE street = '400 Legit Lane'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Address WHERE street = '500 Honest St'); +CREATE EDGE HAS_ADDRESS FROM (SELECT FROM Account WHERE id = 'acct-L3') TO (SELECT FROM Address WHERE street = '600 Genuine Dr'); +-- ── HAS_EMAIL edges ───────────────────────────────────────────────────────── +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Email WHERE address = 'alice@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Email WHERE address = 'bob@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Email WHERE address = 'carol@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Email WHERE address = 'dan@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Email WHERE address = 'eve@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-F') TO (SELECT FROM Email WHERE address = 'robert@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-G') TO (SELECT FROM Email WHERE address = 'rob@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-H') TO (SELECT FROM Email WHERE address = 'hank@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Email WHERE address = 'liam@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Email WHERE address = 'lisa@example.com'); +CREATE EDGE HAS_EMAIL FROM (SELECT FROM Account WHERE id = 'acct-L3') TO (SELECT FROM Email WHERE address = 'luke@example.com'); +-- ── BENEFICIARY_OF edges ──────────────────────────────────────────────────── +CREATE EDGE BENEFICIARY_OF FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Beneficiary WHERE id = 'ben-shell1'); +CREATE EDGE BENEFICIARY_OF FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Beneficiary WHERE id = 'ben-shell1'); +CREATE EDGE BENEFICIARY_OF FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Beneficiary WHERE id = 'ben-shell2'); +CREATE EDGE BENEFICIARY_OF FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Beneficiary WHERE id = 'ben-legit1'); +-- ── TRANSFERRED_TO edges (circular: A→B→C→D→E→A) ─────────────────────────── +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-A') TO (SELECT FROM Account WHERE id = 'acct-B') SET amount = 9000.0, ts = '2026-02-05T10:00:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-B') TO (SELECT FROM Account WHERE id = 'acct-C') SET amount = 8500.0, ts = '2026-02-10T14:30:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-C') TO (SELECT FROM Account WHERE id = 'acct-D') SET amount = 9200.0, ts = '2026-02-15T09:15:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-D') TO (SELECT FROM Account WHERE id = 'acct-E') SET amount = 8800.0, ts = '2026-02-20T16:45:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-E') TO (SELECT FROM Account WHERE id = 'acct-A') SET amount = 9500.0, ts = '2026-02-25T11:20:00Z'; +-- Normal transfers for legitimate accounts +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-L1') TO (SELECT FROM Account WHERE id = 'acct-L2') SET amount = 500.0, ts = '2026-02-18T08:00:00Z'; +CREATE EDGE TRANSFERRED_TO FROM (SELECT FROM Account WHERE id = 'acct-L2') TO (SELECT FROM Account WHERE id = 'acct-L3') SET amount = 250.0, ts = '2026-02-22T12:00:00Z'; +-- ── Transactions (velocity attack for H — 10 txns in 5 minutes) ───────────── +INSERT INTO Transaction SET id = 'txn-H01', account_id = 'acct-H', amount = 499.99, merchant = 'QuickMart', behavior_embedding = [0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.9], ts = '2026-03-01T13:00:00Z'; +INSERT INTO Transaction SET id = 'txn-H02', account_id = 'acct-H', amount = 489.50, merchant = 'FastShop', behavior_embedding = [0.85, 0.15, 0.1, 0.1, 0.1, 0.1, 0.15, 0.85], ts = '2026-03-01T13:00:30Z'; +INSERT INTO Transaction SET id = 'txn-H03', account_id = 'acct-H', amount = 475.00, merchant = 'SpeedBuy', behavior_embedding = [0.88, 0.12, 0.1, 0.1, 0.1, 0.1, 0.12, 0.88], ts = '2026-03-01T13:01:00Z'; +INSERT INTO Transaction SET id = 'txn-H04', account_id = 'acct-H', amount = 450.00, merchant = 'RushStore', behavior_embedding = [0.92, 0.08, 0.1, 0.1, 0.1, 0.1, 0.08, 0.92], ts = '2026-03-01T13:01:30Z'; +INSERT INTO Transaction SET id = 'txn-H05', account_id = 'acct-H', amount = 510.00, merchant = 'QuickMart', behavior_embedding = [0.87, 0.13, 0.1, 0.1, 0.1, 0.1, 0.13, 0.87], ts = '2026-03-01T13:02:00Z'; +INSERT INTO Transaction SET id = 'txn-H06', account_id = 'acct-H', amount = 495.00, merchant = 'FastShop', behavior_embedding = [0.91, 0.09, 0.1, 0.1, 0.1, 0.1, 0.09, 0.91], ts = '2026-03-01T13:02:30Z'; +INSERT INTO Transaction SET id = 'txn-H07', account_id = 'acct-H', amount = 520.00, merchant = 'SpeedBuy', behavior_embedding = [0.86, 0.14, 0.1, 0.1, 0.1, 0.1, 0.14, 0.86], ts = '2026-03-01T13:03:00Z'; +INSERT INTO Transaction SET id = 'txn-H08', account_id = 'acct-H', amount = 480.00, merchant = 'RushStore', behavior_embedding = [0.93, 0.07, 0.1, 0.1, 0.1, 0.1, 0.07, 0.93], ts = '2026-03-01T13:03:30Z'; +INSERT INTO Transaction SET id = 'txn-H09', account_id = 'acct-H', amount = 465.00, merchant = 'QuickMart', behavior_embedding = [0.89, 0.11, 0.1, 0.1, 0.1, 0.1, 0.11, 0.89], ts = '2026-03-01T13:04:00Z'; +INSERT INTO Transaction SET id = 'txn-H10', account_id = 'acct-H', amount = 505.00, merchant = 'FastShop', behavior_embedding = [0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.9], ts = '2026-03-01T13:04:30Z'; +-- Normal transactions for legitimate accounts (behavior close to profile) +INSERT INTO Transaction SET id = 'txn-L1-01', account_id = 'acct-L1', amount = 45.00, merchant = 'Grocery Store', behavior_embedding = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], ts = '2026-02-28T09:00:00Z'; +INSERT INTO Transaction SET id = 'txn-L1-02', account_id = 'acct-L1', amount = 120.00, merchant = 'Gas Station', behavior_embedding = [0.48, 0.52, 0.5, 0.5, 0.5, 0.5, 0.48, 0.52], ts = '2026-02-28T14:00:00Z'; +INSERT INTO Transaction SET id = 'txn-L2-01', account_id = 'acct-L2', amount = 85.00, merchant = 'Restaurant', behavior_embedding = [0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6], ts = '2026-02-27T18:30:00Z'; +INSERT INTO Transaction SET id = 'txn-L3-01', account_id = 'acct-L3', amount = 200.00, merchant = 'Department Store', behavior_embedding = [0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4], ts = '2026-02-26T11:00:00Z'; +-- Transactions for fraud ring (amounts for correlation query) +INSERT INTO Transaction SET id = 'txn-A01', account_id = 'acct-A', amount = 9000.0, merchant = 'Transfer', behavior_embedding = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2], ts = '2026-02-05T10:00:00Z'; +INSERT INTO Transaction SET id = 'txn-A02', account_id = 'acct-A', amount = 8500.0, merchant = 'Transfer', behavior_embedding = [0.88, 0.82, 0.72, 0.58, 0.48, 0.42, 0.32, 0.22], ts = '2026-02-10T14:00:00Z'; +INSERT INTO Transaction SET id = 'txn-A03', account_id = 'acct-A', amount = 9200.0, merchant = 'Transfer', behavior_embedding = [0.91, 0.79, 0.68, 0.62, 0.52, 0.38, 0.28, 0.18], ts = '2026-02-15T09:00:00Z'; +INSERT INTO Transaction SET id = 'txn-B01', account_id = 'acct-B', amount = 8500.0, merchant = 'Transfer', behavior_embedding = [0.88, 0.82, 0.72, 0.58, 0.48, 0.42, 0.32, 0.22], ts = '2026-02-10T14:30:00Z'; +INSERT INTO Transaction SET id = 'txn-B02', account_id = 'acct-B', amount = 9200.0, merchant = 'Transfer', behavior_embedding = [0.87, 0.83, 0.73, 0.57, 0.47, 0.43, 0.33, 0.23], ts = '2026-02-15T09:30:00Z'; +INSERT INTO Transaction SET id = 'txn-B03', account_id = 'acct-B', amount = 8800.0, merchant = 'Transfer', behavior_embedding = [0.92, 0.78, 0.69, 0.61, 0.51, 0.39, 0.29, 0.19], ts = '2026-02-20T16:00:00Z'; +-- ── Deposits (structuring pattern for fraud ring) ─────────────────────────── +INSERT INTO Deposit SET account_id = 'acct-A', amount = 9500.0, ts = '2026-02-05T08:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-A', amount = 9800.0, ts = '2026-02-05T10:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-A', amount = 9200.0, ts = '2026-02-05T14:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-B', amount = 8500.0, ts = '2026-02-06T09:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-B', amount = 9100.0, ts = '2026-02-06T11:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-B', amount = 8800.0, ts = '2026-02-06T15:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-C', amount = 9900.0, ts = '2026-02-07T08:30:00Z'; +INSERT INTO Deposit SET account_id = 'acct-C', amount = 9700.0, ts = '2026-02-07T12:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-C', amount = 8600.0, ts = '2026-02-07T16:30:00Z'; +-- Normal deposits for legitimate accounts +INSERT INTO Deposit SET account_id = 'acct-L1', amount = 3000.0, ts = '2026-02-15T09:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-L2', amount = 5500.0, ts = '2026-02-20T10:00:00Z'; +INSERT INTO Deposit SET account_id = 'acct-L3', amount = 1200.0, ts = '2026-02-25T11:00:00Z';