From bceea47f017de62a96d8f388a62c358ef1c58921 Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 23 Mar 2026 13:26:04 +0100 Subject: [PATCH 1/2] feat(feature-store): add AI/ML Feature Store use case Unified ML feature store serving three domains (fraud scoring, product recommendations, predictive maintenance) from a single ArcadeDB instance. Demonstrates graph, vector, time-series, and document features with 11 query patterns across curl, Java, and JavaScript (PostgreSQL protocol). Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/dependabot.yml | 37 + .github/workflows/feature-store.yml | 97 ++ CLAUDE.md | 1 + README.md | 1 + docs/plans/2026-03-23-feature-store-design.md | 294 +++++ docs/plans/2026-03-23-feature-store.md | 1010 +++++++++++++++++ feature-store/README.md | 135 +++ feature-store/docker-compose.yml | 16 + feature-store/java/pom.xml | 57 + .../com/arcadedb/examples/FeatureStore.java | 391 +++++++ feature-store/js/feature-store.js | 329 ++++++ feature-store/js/package.json | 10 + feature-store/queries/queries.sh | 234 ++++ feature-store/setup.sh | 51 + feature-store/sql/01-schema.sql | 68 ++ feature-store/sql/02-data.sql | 123 ++ 16 files changed, 2854 insertions(+) create mode 100644 .github/workflows/feature-store.yml create mode 100644 docs/plans/2026-03-23-feature-store-design.md create mode 100644 docs/plans/2026-03-23-feature-store.md create mode 100644 feature-store/README.md create mode 100644 feature-store/docker-compose.yml create mode 100644 feature-store/java/pom.xml create mode 100644 feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java create mode 100644 feature-store/js/feature-store.js create mode 100644 feature-store/js/package.json create mode 100755 feature-store/queries/queries.sh create mode 100755 feature-store/setup.sh create mode 100644 feature-store/sql/01-schema.sql create mode 100644 feature-store/sql/02-data.sql diff --git a/.github/dependabot.yml b/.github/dependabot.yml index cbfe22f..7ba0367 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -279,3 +279,40 @@ updates: arcadedb-docker: patterns: - "arcadedata/arcadedb" + + # ── Feature Store ────────────────────────────────────────────────────────── + - package-ecosystem: "maven" + directory: "/feature-store/java" + schedule: + interval: weekly + day: "sunday" + open-pull-requests-limit: 20 + groups: + arcadedb: + patterns: + - "com.arcadedb:*" + maven-plugins: + patterns: + - "org.apache.maven.plugins:*" + + - package-ecosystem: "npm" + directory: "/feature-store/js" + schedule: + interval: weekly + day: "sunday" + open-pull-requests-limit: 10 + groups: + node-pg: + patterns: + - "pg" + + - package-ecosystem: "docker-compose" + directory: "/feature-store" + schedule: + interval: weekly + day: "sunday" + open-pull-requests-limit: 10 + groups: + arcadedb-docker: + patterns: + - "arcadedata/arcadedb" diff --git a/.github/workflows/feature-store.yml b/.github/workflows/feature-store.yml new file mode 100644 index 0000000..932b8d4 --- /dev/null +++ b/.github/workflows/feature-store.yml @@ -0,0 +1,97 @@ +name: Feature Store CI + +on: + push: + paths: + - feature-store/** + - .github/workflows/feature-store.yml + pull_request: + paths: + - feature-store/** + - .github/workflows/feature-store.yml + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: read + strategy: + fail-fast: false + matrix: + runner: [curl, java, js] + + env: + ARCADEDB_URL: http://localhost:2480 + ARCADEDB_USER: root + ARCADEDB_PASS: arcadedb + + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + + - name: Set up Java + if: matrix.runner == 'java' + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: '21' + distribution: 'temurin' + + - name: Cache Maven repository + if: matrix.runner == 'java' + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-feature-store-${{ hashFiles('feature-store/java/pom.xml') }} + restore-keys: ${{ runner.os }}-m2-feature-store- + + - name: Set up Node.js + if: matrix.runner == 'js' + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: '22' + + - name: Cache npm + if: matrix.runner == 'js' + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 + with: + path: ~/.npm + key: ${{ runner.os }}-npm-feature-store-${{ hashFiles('feature-store/js/package.json') }} + restore-keys: ${{ runner.os }}-npm-feature-store- + + - name: Install JS dependencies + if: matrix.runner == 'js' + working-directory: feature-store/js + run: npm install + + - name: Start ArcadeDB + working-directory: feature-store + run: docker compose up -d + + - name: Setup database + working-directory: feature-store + run: ./setup.sh + + - name: Run curl queries + if: matrix.runner == 'curl' + working-directory: feature-store + run: ./queries/queries.sh + + - name: Build and run Java + if: matrix.runner == 'java' + working-directory: feature-store/java + run: | + mvn package --no-transfer-progress + java -jar target/feature-store.jar + + - name: Run JavaScript queries + if: matrix.runner == 'js' + working-directory: feature-store/js + run: node feature-store.js + + - name: Teardown + if: always() + working-directory: feature-store + run: docker compose down diff --git a/CLAUDE.md b/CLAUDE.md index 16c5e41..135c22c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,6 +16,7 @@ A collection of self-contained projects demonstrating [ArcadeDB](https://arcaded | `fraud-detection/` | `FraudDetection` | 26.3.1 | 21 | HTTP API (`arcadedb-network`) | | `supply-chain/` | `SupplyChain` | 26.3.1 | 21 | HTTP API (`arcadedb-network`) + PostgreSQL (`pg`) | | `iam/` | `IAM` | 26.3.1 | 21 | HTTP API (`arcadedb-network`) + PostgreSQL (`psycopg`) | +| `feature-store/` | `FeatureStore` | 26.3.2 | 21 | HTTP API (`arcadedb-network`) + PostgreSQL (`pg`) | ## Directory Structure (per use case) diff --git a/README.md b/README.md index 0b306e3..351debf 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ and runnable demos via both `curl` and a Java program. | [supply-chain](./supply-chain/) | Supply chain management with multi-tier visibility | Graph traversal, Vector similarity, Time-series, PostgreSQL protocol, JavaScript | | [iam](./iam/) | Identity & Access Management | Graph traversal, Time-series, Vector similarity, PostgreSQL protocol, Python | | [customer-360](./customer-360/) | Unified customer view with identity resolution and churn prediction | Graph traversal, Documents, Vectors, Full-text search, OpenCypher | +| [feature-store](./feature-store/) | Unified ML feature store for fraud, recommendations, and maintenance | Graph traversal, Vector similarity, Time-series, PostgreSQL protocol, JavaScript | ## Structure diff --git a/docs/plans/2026-03-23-feature-store-design.md b/docs/plans/2026-03-23-feature-store-design.md new file mode 100644 index 0000000..851e282 --- /dev/null +++ b/docs/plans/2026-03-23-feature-store-design.md @@ -0,0 +1,294 @@ +# AI/ML Feature Store Use Case — Design + +**Date:** 2026-03-23 +**Branch:** feat/feature-store +**ArcadeDB version:** 26.3.2 + +## Overview + +Implement the [ArcadeDB AI/ML Feature Store](https://arcadedb.com/ai-ml-feature-store.html) use case. The scenario demonstrates ArcadeDB as a unified feature store for production ML systems, replacing the typical scatter of 3+ specialized databases (graph DB, vector DB, time-series DB) with a single multi-model engine. + +Three ML teams share one ArcadeDB instance: + +| ML Team | Domain | Feature Types | +|---------|--------|--------------| +| Risk | Fraud scoring | Graph topology + behavior vectors + transaction velocity | +| Growth | Product recommendations | Collaborative filtering + product embeddings | +| Operations | Predictive maintenance | Equipment dependency graph + sensor aggregates | + +A cross-cutting **Feature Store infrastructure** layer records feature snapshots for audit/lineage, demonstrating training-serving consistency. + +## Repository Structure + +``` +feature-store/ +├── docker-compose.yml # ArcadeDB with PostgreSQL plugin (ports 2480 + 5432) +├── setup.sh +├── sql/ +│ ├── 01-schema.sql +│ └── 02-data.sql +├── queries/ +│ └── queries.sh +├── java/ +│ ├── pom.xml +│ └── src/main/java/com/arcadedb/examples/FeatureStore.java +├── js/ +│ ├── package.json +│ └── feature-store.js +└── README.md +``` + +## Docker Compose + +- Single service: `arcadedata/arcadedb:26.3.2` +- HTTP API port: `2480` (shell scripts + Java) +- PostgreSQL protocol port: `5432` (JavaScript) +- PostgreSQL plugin enabled via `JAVA_OPTS` +- Root password: `-Darcadedb.server.rootPassword=arcadedb` + +## Schema (`sql/01-schema.sql`) + +### Vertex Types (6) + +| Type | Properties | Purpose | +|------|-----------|---------| +| `Account` | `accountId` (STRING), `accountType` (STRING), `signupSource` (STRING), `flagged` (BOOLEAN), `behaviorVec` (LIST) | Fraud domain — financial accounts with behavior embeddings | +| `Merchant` | `merchantId` (STRING), `category` (STRING), `riskTier` (STRING) | Fraud domain — transaction counterparties | +| `User` | `userId` (STRING), `preferenceVec` (LIST) | Recommendation domain — platform users | +| `Product` | `productId` (STRING), `name` (STRING), `category` (STRING), `price` (FLOAT), `embedding` (LIST) | Recommendation domain — catalog items | +| `Equipment` | `equipmentId` (STRING), `name` (STRING), `specifications` (STRING), `failureRate` (FLOAT) | Maintenance domain — monitored assets | +| `Sensor` | `sensorId` (STRING), `sensorType` (STRING), `unit` (STRING) | Maintenance domain — sensor metadata | + +### Edge Types (6) + +| Type | From → To | Properties | Purpose | +|------|-----------|-----------|---------| +| `TRANSFERRED` | Account → Account | `amount` (FLOAT), `recordedAt` (DATETIME) | Money transfers between accounts | +| `LINKED_DEVICE` | Account → Account | `deviceId` (STRING) | Shared-device signal (fraud) | +| `TRANSACTED` | Account → Merchant | `amount` (FLOAT), `recordedAt` (DATETIME) | Account-to-merchant transactions | +| `PURCHASED` | User → Product | — | Purchase history (recommendations) | +| `DEPENDS_ON` | Equipment → Equipment | `criticality` (STRING) | Upstream dependency chain | +| `MONITORED_BY` | Equipment → Sensor | — | Sensor-to-equipment mapping | + +### Document Types (3) + +| Type | Properties | Purpose | +|------|-----------|---------| +| `TransactionMetric` | `accountId` (STRING), `txCount` (LONG), `totalAmount` (FLOAT), `recordedAt` (DATETIME) | Time-bucketed transaction aggregates for velocity features | +| `SensorReading` | `equipmentId` (STRING), `temperature` (FLOAT), `vibration` (FLOAT), `pressure` (FLOAT), `recordedAt` (DATETIME) | Time-series sensor data for anomaly detection | +| `FeatureSnapshot` | `entityId` (STRING), `entityType` (STRING), `featureVector` (LIST), `computedAt` (DATETIME), `modelVersion` (STRING) | Audit trail — persisted feature vectors for lineage | + +### Indexes + +| Index | Type | Purpose | +|-------|------|---------| +| `Account[behaviorVec]` | LSM_VECTOR (4d, COSINE) | Behavior similarity search | +| `Product[embedding]` | LSM_VECTOR (4d, COSINE) | Product embedding search | +| `Account(accountId)` | UNIQUE | Account lookup | +| `User(userId)` | UNIQUE | User lookup | +| `Equipment(equipmentId)` | UNIQUE | Equipment lookup | + +## Sample Data (`sql/02-data.sql`) + +### Fraud Domain +- 6 accounts (a1–a6): a1–a3 legit, a4–a5 suspicious, a6 flagged +- Behavior vectors: legit cluster near `[0.1, 0.2, 0.8, 0.9]`, fraud cluster near `[0.9, 0.8, 0.1, 0.2]` +- 4 merchants: grocery, electronics, gambling (high-risk), crypto (high-risk) +- ~12 TRANSFERRED edges (including circular patterns involving a6) +- ~8 TRANSACTED edges +- 4 LINKED_DEVICE edges (a4↔a6, a5↔a6 share devices) +- ~15 TransactionMetric documents across 3 time buckets + +### Recommendation Domain +- 5 users (u1–u5) with 4-d preference vectors +- 8 products across Electronics, Books, Sports categories with 4-d embeddings +- ~15 PURCHASED edges with deliberate overlap (u1+u2 share purchases → collab signal) + +### Maintenance Domain +- 5 equipment units (eq1–eq5) forming a dependency chain: eq1 → eq2 → eq3, eq1 → eq4, eq4 → eq5 +- 3 sensors monitoring eq1–eq3 +- ~12 SensorReading documents (eq1 showing anomalous readings) + +### Feature Store Infrastructure +- 3 FeatureSnapshot documents showing pre-computed feature vectors for a1, a4, a6 + +## Queries + +### Fraud Domain (5 queries) + +#### Query 1: Account Graph Features (SQL MATCH) +Compute graph topology features for account a4: in-degree, out-degree, distinct counterparty count. +```sql +SELECT inDeg, outDeg, counterparties +FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties +) +``` + +#### Query 2: Distance to Flagged Account (SQL MATCH) +Find the shortest path length from a4 to the nearest flagged account via TRANSFERRED edges (up to 4 hops). +```sql +SELECT flaggedId, depth +FROM ( + MATCH {type: Account, where: (accountId = 'a4')} + .both('TRANSFERRED'){while: ($depth < 4), as: hop} + {type: Account, where: (flagged = true), as: flagged} + RETURN flagged.accountId AS flaggedId, $depth AS depth +) +ORDER BY depth ASC +LIMIT 1 +``` + +#### Query 3: Behavior Similarity Search (SQL) +Find accounts with behavior vectors most similar to flagged account a6. +```sql +SELECT accountId, accountType, flagged +FROM Account +ORDER BY vectorNeighbors('Account[behaviorVec]', [0.9, 0.8, 0.1, 0.2], 10) DESC +LIMIT 5 +``` + +#### Query 4: Transaction Velocity (SQL) +Aggregate TransactionMetric documents for each account: total transactions, total amount, average amount per bucket. +```sql +SELECT accountId, + sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount +FROM TransactionMetric +GROUP BY accountId +ORDER BY totalTx DESC +``` + +#### Query 5: Shared Device Network (Cypher) +Find accounts sharing devices with flagged accounts — guilt-by-association. +```cypher +MATCH (flagged:Account {flagged: true}) + -[:LINKED_DEVICE]-(suspect:Account) +WHERE suspect.flagged = false +RETURN DISTINCT suspect.accountId, suspect.accountType, + flagged.accountId AS linkedToFlagged +``` + +### Recommendation Domain (3 queries) + +#### Query 6: Collaborative Filtering (Cypher) +Find products to recommend to u1 based on shared purchase history. +```cypher +MATCH (me:User {userId: 'u1'}) + -[:PURCHASED]->(p:Product) + <-[:PURCHASED]-(other:User) + -[:PURCHASED]->(rec:Product) +WHERE rec <> p + AND NOT (me)-[:PURCHASED]->(rec) +RETURN rec.name, rec.category, count(DISTINCT other) AS score +ORDER BY score DESC LIMIT 10 +``` + +#### Query 7: Product Embedding Search (SQL) +Find products similar to "Laptop" by embedding vector. +```sql +SELECT name, category, price +FROM Product +ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 10) DESC +LIMIT 5 +``` + +#### Query 8: Personalized Ranking (Cypher) +Rank Electronics products for u1 by preference vector similarity. +```cypher +MATCH (u:User {userId: 'u1'}) +MATCH (p:Product) +WHERE p.category = 'Electronics' +RETURN p.name, p.price +ORDER BY vectorNeighbors('Product[embedding]', u.preferenceVec, 20) DESC +LIMIT 10 +``` + +### Maintenance Domain (2 queries) + +#### Query 9: Equipment Dependency Chain (SQL MATCH) +Find all downstream equipment affected if eq1 fails. +```sql +SELECT name, failureRate, depth +FROM ( + MATCH {type: Equipment, where: (equipmentId = 'eq1')} + .in('DEPENDS_ON'){while: ($depth < 5), as: dep} + RETURN dep.name AS name, dep.failureRate AS failureRate, $depth AS depth +) +ORDER BY depth ASC +``` + +#### Query 10: Sensor Anomaly Detection (SQL) +Find equipment with anomalous sensor readings (high temperature or vibration). +```sql +SELECT equipmentId, + avg(temperature) AS avgTemp, + max(vibration) AS maxVibration, + avg(pressure) AS avgPressure +FROM SensorReading +GROUP BY equipmentId +ORDER BY avgTemp DESC +``` + +### Cross-Domain (1 multi-step query) + +#### Query 11: Feature Vector Assembly (Multi-step) +Assemble a complete fraud feature vector for account a4 by combining graph, vector, and time-series signals, then store as a FeatureSnapshot. + +**Step 1 — Graph features:** degree + distance-to-flagged (reuses queries 1 & 2 logic) +**Step 2 — Vector features:** similarity rank among known fraud accounts (reuses query 3 logic) +**Step 3 — Time-series features:** transaction velocity (reuses query 4 logic) +**Step 4 — Store snapshot:** INSERT INTO FeatureSnapshot with assembled vector + +## Query Language Mapping + +| # | Pattern | Language | Signal | Shell | Java | JS (pg) | +|---|---------|----------|--------|-------|------|---------| +| 1 | Account Graph Features | SQL MATCH | Graph | sql | sql | sql | +| 2 | Distance to Flagged | SQL MATCH | Graph | sql | sql | sql | +| 3 | Behavior Similarity | SQL | Vector | sql | sql | sql | +| 4 | Transaction Velocity | SQL | Time-series | sql | sql | sql | +| 5 | Shared Device Network | Cypher | Graph | cypher | cypher | {cypher} prefix | +| 6 | Collaborative Filtering | Cypher | Graph | cypher | cypher | {cypher} prefix | +| 7 | Product Embedding Search | SQL | Vector | sql | sql | sql | +| 8 | Personalized Ranking | Cypher | Graph + Vector | cypher | cypher | {cypher} prefix | +| 9 | Equipment Dependency Chain | SQL MATCH | Graph | sql | sql | sql | +| 10 | Sensor Anomaly Detection | SQL | Time-series | sql | sql | sql | +| 11 | Feature Vector Assembly | SQL (multi-step) | All | sql | sql | sql | + +## JavaScript Module (`js/`) + +- Uses the `pg` npm package to connect via ArcadeDB's PostgreSQL wire protocol on port 5432 +- Cypher queries prefixed with `{cypher}` (e.g., `{cypher} MATCH ...`) +- Same `printHeader()` / `tryRun()` pattern as `supply-chain/js/` +- All 11 queries implemented + +## Dependabot + +Three new entries in `.github/dependabot.yml`: + +1. **Maven** — `/feature-store/java` (groups: `arcadedb`, `maven-plugins`) +2. **npm** — `/feature-store/js` (groups: `node-pg`) +3. **Docker Compose** — `/feature-store` (groups: `arcadedb-docker`) + +## CI Workflow + +`.github/workflows/feature-store.yml` — matrix `[curl, java, js]`, same pattern as `supply-chain.yml`. + +## Success Criteria + +- `docker compose up` starts ArcadeDB with PostgreSQL plugin successfully +- SQL files apply cleanly via `setup.sh` +- `queries.sh` runs all 11 queries and returns non-empty result sets +- `mvn package && java -jar ...` runs all 11 queries and prints results +- `node feature-store.js` runs all 11 queries via PostgreSQL protocol +- Dependabot entries pass validation + +## Reference + +- [ArcadeDB AI/ML Feature Store](https://arcadedb.com/ai-ml-feature-store.html) +- [ArcadeDB PostgreSQL Protocol](https://docs.arcadedb.com/#Postgres-Driver) diff --git a/docs/plans/2026-03-23-feature-store.md b/docs/plans/2026-03-23-feature-store.md new file mode 100644 index 0000000..03e23dc --- /dev/null +++ b/docs/plans/2026-03-23-feature-store.md @@ -0,0 +1,1010 @@ +# AI/ML Feature Store Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Build a fully self-contained `feature-store/` directory demonstrating ArcadeDB as a unified ML feature store serving three domains (fraud, recommendations, maintenance) via 11 query patterns, runnable with `curl`, Java, and JavaScript (PostgreSQL protocol). + +**Architecture:** Self-contained directory per the design doc. Docker Compose brings up ArcadeDB 26.3.2 with PostgreSQL plugin. A `setup.sh` creates the database and applies SQL files. Eleven queries are demonstrated via `queries/queries.sh` (curl), `java/` (Maven fat JAR using `arcadedb-network`), and `js/` (Node.js using `pg` driver). + +**Tech Stack:** ArcadeDB 26.3.2, Docker Compose, Maven 3.x, Java 21, `com.arcadedb:arcadedb-network:26.3.2`, Node.js 22, `pg` npm package, `jq` (for setup/query scripts) + +--- + +### Task 1: Scaffold the directory structure + +**Files:** +- Create: `feature-store/` (directory) +- Create: `feature-store/sql/` (directory) +- Create: `feature-store/queries/` (directory) +- Create: `feature-store/java/src/main/java/com/arcadedb/examples/` (directory) +- Create: `feature-store/js/` (directory) + +**Step 1: Create all directories** + +```bash +mkdir -p feature-store/sql +mkdir -p feature-store/queries +mkdir -p feature-store/java/src/main/java/com/arcadedb/examples +mkdir -p feature-store/js +``` + +**Step 2: Verify** + +```bash +find feature-store -type d +``` + +**Step 3: Commit** + +```bash +git add feature-store/ +git commit -m "chore: scaffold feature-store directory structure" +``` + +--- + +### Task 2: Write docker-compose.yml + +**Files:** +- Create: `feature-store/docker-compose.yml` + +**Step 1: Write the file** + +```yaml +services: + arcadedb: + image: arcadedata/arcadedb:26.3.2 + ports: + - "2480:2480" + - "5432:5432" + environment: + JAVA_OPTS: >- + -Darcadedb.server.rootPassword=arcadedb + -Darcadedb.server.plugins=Postgres:com.arcadedb.postgres.PostgresProtocolPlugin + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:2480/api/v1/ready"] + interval: 5s + timeout: 3s + retries: 20 + start_period: 10s +``` + +**Step 2: Verify the container starts** + +```bash +cd feature-store +docker compose up -d +docker compose ps +``` + +Expected: `arcadedb` service shows `healthy` after ~30 seconds. Both ports 2480 and 5432 mapped. + +**Step 3: Verify PostgreSQL protocol is reachable** + +```bash +curl -sf -u root:arcadedb http://localhost:2480/api/v1/ready +``` + +**Step 4: Commit** + +```bash +git add feature-store/docker-compose.yml +git commit -m "feat(feature-store): add docker-compose with PostgreSQL plugin" +``` + +--- + +### Task 3: Write the SQL schema + +**Files:** +- Create: `feature-store/sql/01-schema.sql` + +**Step 1: Write the schema file** + +One statement per line. 6 vertex types, 6 edge types, 3 document types, 5 indexes. + +```sql +-- Fraud domain +CREATE VERTEX TYPE Account IF NOT EXISTS; +CREATE PROPERTY Account.accountId IF NOT EXISTS STRING; +CREATE PROPERTY Account.accountType IF NOT EXISTS STRING; +CREATE PROPERTY Account.signupSource IF NOT EXISTS STRING; +CREATE PROPERTY Account.flagged IF NOT EXISTS BOOLEAN; +CREATE PROPERTY Account.behaviorVec IF NOT EXISTS LIST; +CREATE INDEX IF NOT EXISTS ON Account (accountId) UNIQUE; +CREATE VERTEX TYPE Merchant IF NOT EXISTS; +CREATE PROPERTY Merchant.merchantId IF NOT EXISTS STRING; +CREATE PROPERTY Merchant.category IF NOT EXISTS STRING; +CREATE PROPERTY Merchant.riskTier IF NOT EXISTS STRING; +-- Recommendation domain +CREATE VERTEX TYPE User IF NOT EXISTS; +CREATE PROPERTY User.userId IF NOT EXISTS STRING; +CREATE PROPERTY User.preferenceVec IF NOT EXISTS LIST; +CREATE INDEX IF NOT EXISTS ON User (userId) UNIQUE; +CREATE VERTEX TYPE Product IF NOT EXISTS; +CREATE PROPERTY Product.productId IF NOT EXISTS STRING; +CREATE PROPERTY Product.name IF NOT EXISTS STRING; +CREATE PROPERTY Product.category IF NOT EXISTS STRING; +CREATE PROPERTY Product.price IF NOT EXISTS FLOAT; +CREATE PROPERTY Product.embedding IF NOT EXISTS LIST; +-- Maintenance domain +CREATE VERTEX TYPE Equipment IF NOT EXISTS; +CREATE PROPERTY Equipment.equipmentId IF NOT EXISTS STRING; +CREATE PROPERTY Equipment.name IF NOT EXISTS STRING; +CREATE PROPERTY Equipment.specifications IF NOT EXISTS STRING; +CREATE PROPERTY Equipment.failureRate IF NOT EXISTS FLOAT; +CREATE INDEX IF NOT EXISTS ON Equipment (equipmentId) UNIQUE; +CREATE VERTEX TYPE Sensor IF NOT EXISTS; +CREATE PROPERTY Sensor.sensorId IF NOT EXISTS STRING; +CREATE PROPERTY Sensor.sensorType IF NOT EXISTS STRING; +CREATE PROPERTY Sensor.unit IF NOT EXISTS STRING; +-- Edge types +CREATE EDGE TYPE TRANSFERRED IF NOT EXISTS; +CREATE PROPERTY TRANSFERRED.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY TRANSFERRED.recordedAt IF NOT EXISTS DATETIME; +CREATE EDGE TYPE LINKED_DEVICE IF NOT EXISTS; +CREATE PROPERTY LINKED_DEVICE.deviceId IF NOT EXISTS STRING; +CREATE EDGE TYPE TRANSACTED IF NOT EXISTS; +CREATE PROPERTY TRANSACTED.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY TRANSACTED.recordedAt IF NOT EXISTS DATETIME; +CREATE EDGE TYPE PURCHASED IF NOT EXISTS; +CREATE EDGE TYPE DEPENDS_ON IF NOT EXISTS; +CREATE PROPERTY DEPENDS_ON.criticality IF NOT EXISTS STRING; +CREATE EDGE TYPE MONITORED_BY IF NOT EXISTS; +-- Document types +CREATE DOCUMENT TYPE TransactionMetric IF NOT EXISTS; +CREATE PROPERTY TransactionMetric.accountId IF NOT EXISTS STRING; +CREATE PROPERTY TransactionMetric.txCount IF NOT EXISTS LONG; +CREATE PROPERTY TransactionMetric.totalAmount IF NOT EXISTS FLOAT; +CREATE PROPERTY TransactionMetric.recordedAt IF NOT EXISTS DATETIME; +CREATE DOCUMENT TYPE SensorReading IF NOT EXISTS; +CREATE PROPERTY SensorReading.equipmentId IF NOT EXISTS STRING; +CREATE PROPERTY SensorReading.temperature IF NOT EXISTS FLOAT; +CREATE PROPERTY SensorReading.vibration IF NOT EXISTS FLOAT; +CREATE PROPERTY SensorReading.pressure IF NOT EXISTS FLOAT; +CREATE PROPERTY SensorReading.recordedAt IF NOT EXISTS DATETIME; +CREATE DOCUMENT TYPE FeatureSnapshot IF NOT EXISTS; +CREATE PROPERTY FeatureSnapshot.entityId IF NOT EXISTS STRING; +CREATE PROPERTY FeatureSnapshot.entityType IF NOT EXISTS STRING; +CREATE PROPERTY FeatureSnapshot.featureVector IF NOT EXISTS LIST; +CREATE PROPERTY FeatureSnapshot.computedAt IF NOT EXISTS DATETIME; +CREATE PROPERTY FeatureSnapshot.modelVersion IF NOT EXISTS STRING; +-- Vector indexes +CREATE INDEX IF NOT EXISTS ON Account (behaviorVec) LSM_VECTOR METADATA { dimensions: 4, similarity: 'COSINE' }; +CREATE INDEX IF NOT EXISTS ON Product (embedding) LSM_VECTOR METADATA { dimensions: 4, similarity: 'COSINE' }; +``` + +**Step 2: Commit** + +```bash +git add feature-store/sql/01-schema.sql +git commit -m "feat(feature-store): add multi-domain schema SQL" +``` + +--- + +### Task 4: Write the sample data + +**Files:** +- Create: `feature-store/sql/02-data.sql` + +Behavior vectors: legit accounts cluster near `[0.1, 0.2, 0.8, 0.9]`, fraud accounts near `[0.9, 0.8, 0.1, 0.2]`. Product embeddings: Electronics near `[0.9, 0.1, 0.1, 0.1]`, Books near `[0.1, 0.9, 0.1, 0.1]`, Sports near `[0.1, 0.1, 0.9, 0.1]`. + +**Step 1: Write the data file** + +```sql +-- === Fraud Domain: Accounts === +-- a1-a3: legit, a4-a5: suspicious, a6: flagged +INSERT INTO Account SET accountId = 'a1', accountType = 'personal', signupSource = 'organic', flagged = false, behaviorVec = [0.1, 0.2, 0.8, 0.9]; +INSERT INTO Account SET accountId = 'a2', accountType = 'personal', signupSource = 'referral', flagged = false, behaviorVec = [0.2, 0.1, 0.9, 0.8]; +INSERT INTO Account SET accountId = 'a3', accountType = 'business', signupSource = 'organic', flagged = false, behaviorVec = [0.1, 0.3, 0.7, 0.9]; +INSERT INTO Account SET accountId = 'a4', accountType = 'personal', signupSource = 'ad_campaign', flagged = false, behaviorVec = [0.7, 0.6, 0.2, 0.3]; +INSERT INTO Account SET accountId = 'a5', accountType = 'personal', signupSource = 'ad_campaign', flagged = false, behaviorVec = [0.8, 0.7, 0.2, 0.1]; +INSERT INTO Account SET accountId = 'a6', accountType = 'personal', signupSource = 'unknown', flagged = true, behaviorVec = [0.9, 0.8, 0.1, 0.2]; +-- Merchants +INSERT INTO Merchant SET merchantId = 'm1', category = 'grocery', riskTier = 'low'; +INSERT INTO Merchant SET merchantId = 'm2', category = 'electronics', riskTier = 'low'; +INSERT INTO Merchant SET merchantId = 'm3', category = 'gambling', riskTier = 'high'; +INSERT INTO Merchant SET merchantId = 'm4', category = 'crypto', riskTier = 'high'; +-- TRANSFERRED edges (money flows; a4-a6 form a circular pattern) +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Account WHERE accountId = 'a2') SET amount = 500.00, recordedAt = '2026-03-01 10:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a2') TO (SELECT FROM Account WHERE accountId = 'a3') SET amount = 200.00, recordedAt = '2026-03-02 11:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Account WHERE accountId = 'a3') SET amount = 150.00, recordedAt = '2026-03-03 09:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Account WHERE accountId = 'a6') SET amount = 3000.00, recordedAt = '2026-03-10 02:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a6') TO (SELECT FROM Account WHERE accountId = 'a5') SET amount = 2800.00, recordedAt = '2026-03-10 02:30:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a5') TO (SELECT FROM Account WHERE accountId = 'a4') SET amount = 2500.00, recordedAt = '2026-03-10 03:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Account WHERE accountId = 'a5') SET amount = 1500.00, recordedAt = '2026-03-11 04:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a6') TO (SELECT FROM Account WHERE accountId = 'a4') SET amount = 4000.00, recordedAt = '2026-03-12 01:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a3') TO (SELECT FROM Account WHERE accountId = 'a1') SET amount = 100.00, recordedAt = '2026-03-05 14:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a2') TO (SELECT FROM Account WHERE accountId = 'a1') SET amount = 250.00, recordedAt = '2026-03-06 16:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a5') TO (SELECT FROM Account WHERE accountId = 'a6') SET amount = 1800.00, recordedAt = '2026-03-13 05:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Account WHERE accountId = 'a4') SET amount = 300.00, recordedAt = '2026-03-08 12:00:00'; +-- LINKED_DEVICE edges (a4 and a5 share devices with flagged a6) +CREATE EDGE LINKED_DEVICE FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Account WHERE accountId = 'a6') SET deviceId = 'dev-001'; +CREATE EDGE LINKED_DEVICE FROM (SELECT FROM Account WHERE accountId = 'a5') TO (SELECT FROM Account WHERE accountId = 'a6') SET deviceId = 'dev-002'; +CREATE EDGE LINKED_DEVICE FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Account WHERE accountId = 'a5') SET deviceId = 'dev-003'; +CREATE EDGE LINKED_DEVICE FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Account WHERE accountId = 'a2') SET deviceId = 'dev-004'; +-- TRANSACTED edges (account -> merchant) +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Merchant WHERE merchantId = 'm1') SET amount = 85.50, recordedAt = '2026-03-01 09:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Merchant WHERE merchantId = 'm2') SET amount = 450.00, recordedAt = '2026-03-02 14:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a2') TO (SELECT FROM Merchant WHERE merchantId = 'm1') SET amount = 62.30, recordedAt = '2026-03-03 10:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Merchant WHERE merchantId = 'm3') SET amount = 5000.00, recordedAt = '2026-03-10 22:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Merchant WHERE merchantId = 'm4') SET amount = 8000.00, recordedAt = '2026-03-11 01:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a5') TO (SELECT FROM Merchant WHERE merchantId = 'm3') SET amount = 3500.00, recordedAt = '2026-03-12 23:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a6') TO (SELECT FROM Merchant WHERE merchantId = 'm4') SET amount = 12000.00, recordedAt = '2026-03-10 00:30:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a3') TO (SELECT FROM Merchant WHERE merchantId = 'm2') SET amount = 320.00, recordedAt = '2026-03-04 15:00:00'; +-- TransactionMetric documents (time-bucketed velocity data) +INSERT INTO TransactionMetric SET accountId = 'a1', txCount = 5, totalAmount = 1200.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a1', txCount = 3, totalAmount = 800.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a1', txCount = 4, totalAmount = 950.00, recordedAt = '2026-03-15 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a2', txCount = 2, totalAmount = 400.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a2', txCount = 3, totalAmount = 600.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a3', txCount = 4, totalAmount = 1500.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a3', txCount = 2, totalAmount = 700.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a4', txCount = 15, totalAmount = 25000.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a4', txCount = 22, totalAmount = 48000.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a4', txCount = 30, totalAmount = 72000.00, recordedAt = '2026-03-15 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a5', txCount = 8, totalAmount = 12000.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a5', txCount = 18, totalAmount = 35000.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a6', txCount = 25, totalAmount = 60000.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a6', txCount = 35, totalAmount = 95000.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a6', txCount = 40, totalAmount = 120000.00, recordedAt = '2026-03-15 00:00:00'; +-- === Recommendation Domain: Users === +INSERT INTO User SET userId = 'u1', preferenceVec = [0.9, 0.1, 0.1, 0.1]; +INSERT INTO User SET userId = 'u2', preferenceVec = [0.7, 0.3, 0.1, 0.1]; +INSERT INTO User SET userId = 'u3', preferenceVec = [0.1, 0.9, 0.1, 0.1]; +INSERT INTO User SET userId = 'u4', preferenceVec = [0.1, 0.1, 0.9, 0.1]; +INSERT INTO User SET userId = 'u5', preferenceVec = [0.4, 0.4, 0.2, 0.1]; +-- Products +INSERT INTO Product SET productId = 'p1', name = 'Laptop', category = 'Electronics', price = 999.99, embedding = [0.9, 0.1, 0.1, 0.1]; +INSERT INTO Product SET productId = 'p2', name = 'Phone', category = 'Electronics', price = 699.99, embedding = [0.8, 0.2, 0.1, 0.1]; +INSERT INTO Product SET productId = 'p3', name = 'Headphones', category = 'Electronics', price = 199.99, embedding = [0.7, 0.2, 0.2, 0.1]; +INSERT INTO Product SET productId = 'p4', name = 'ML Textbook', category = 'Books', price = 79.99, embedding = [0.1, 0.9, 0.1, 0.1]; +INSERT INTO Product SET productId = 'p5', name = 'Data Science Guide', category = 'Books', price = 49.99, embedding = [0.2, 0.8, 0.1, 0.1]; +INSERT INTO Product SET productId = 'p6', name = 'Python Cookbook', category = 'Books', price = 39.99, embedding = [0.1, 0.8, 0.2, 0.1]; +INSERT INTO Product SET productId = 'p7', name = 'Running Shoes', category = 'Sports', price = 89.99, embedding = [0.1, 0.1, 0.9, 0.1]; +INSERT INTO Product SET productId = 'p8', name = 'Yoga Mat', category = 'Sports', price = 29.99, embedding = [0.1, 0.1, 0.8, 0.2]; +-- PURCHASED edges (u1+u2 share Laptop and Phone -> collab recommends ML Textbook to u1) +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u1') TO (SELECT FROM Product WHERE productId = 'p1'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u1') TO (SELECT FROM Product WHERE productId = 'p2'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u1') TO (SELECT FROM Product WHERE productId = 'p3'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u2') TO (SELECT FROM Product WHERE productId = 'p1'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u2') TO (SELECT FROM Product WHERE productId = 'p2'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u2') TO (SELECT FROM Product WHERE productId = 'p4'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u3') TO (SELECT FROM Product WHERE productId = 'p4'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u3') TO (SELECT FROM Product WHERE productId = 'p5'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u3') TO (SELECT FROM Product WHERE productId = 'p6'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u4') TO (SELECT FROM Product WHERE productId = 'p7'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u4') TO (SELECT FROM Product WHERE productId = 'p8'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u5') TO (SELECT FROM Product WHERE productId = 'p1'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u5') TO (SELECT FROM Product WHERE productId = 'p5'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u5') TO (SELECT FROM Product WHERE productId = 'p7'); +-- === Maintenance Domain: Equipment === +INSERT INTO Equipment SET equipmentId = 'eq1', name = 'Main Compressor', specifications = 'Industrial 500HP', failureRate = 0.02; +INSERT INTO Equipment SET equipmentId = 'eq2', name = 'Cooling Unit A', specifications = 'Glycol cooling 200kW', failureRate = 0.05; +INSERT INTO Equipment SET equipmentId = 'eq3', name = 'Pump Station B', specifications = 'Centrifugal 150GPM', failureRate = 0.03; +INSERT INTO Equipment SET equipmentId = 'eq4', name = 'Generator Alpha', specifications = 'Diesel 800kVA', failureRate = 0.01; +INSERT INTO Equipment SET equipmentId = 'eq5', name = 'Control Panel C', specifications = 'PLC-based HMI', failureRate = 0.08; +-- DEPENDS_ON edges (eq2,eq3 depend on eq1; eq5 depends on eq4; eq4 depends on eq1) +CREATE EDGE DEPENDS_ON FROM (SELECT FROM Equipment WHERE equipmentId = 'eq2') TO (SELECT FROM Equipment WHERE equipmentId = 'eq1') SET criticality = 'high'; +CREATE EDGE DEPENDS_ON FROM (SELECT FROM Equipment WHERE equipmentId = 'eq3') TO (SELECT FROM Equipment WHERE equipmentId = 'eq1') SET criticality = 'medium'; +CREATE EDGE DEPENDS_ON FROM (SELECT FROM Equipment WHERE equipmentId = 'eq4') TO (SELECT FROM Equipment WHERE equipmentId = 'eq1') SET criticality = 'high'; +CREATE EDGE DEPENDS_ON FROM (SELECT FROM Equipment WHERE equipmentId = 'eq5') TO (SELECT FROM Equipment WHERE equipmentId = 'eq4') SET criticality = 'high'; +-- Sensors +INSERT INTO Sensor SET sensorId = 's1', sensorType = 'temperature', unit = 'celsius'; +INSERT INTO Sensor SET sensorId = 's2', sensorType = 'vibration', unit = 'mm_per_sec'; +INSERT INTO Sensor SET sensorId = 's3', sensorType = 'pressure', unit = 'bar'; +-- MONITORED_BY edges +CREATE EDGE MONITORED_BY FROM (SELECT FROM Equipment WHERE equipmentId = 'eq1') TO (SELECT FROM Sensor WHERE sensorId = 's1'); +CREATE EDGE MONITORED_BY FROM (SELECT FROM Equipment WHERE equipmentId = 'eq1') TO (SELECT FROM Sensor WHERE sensorId = 's2'); +CREATE EDGE MONITORED_BY FROM (SELECT FROM Equipment WHERE equipmentId = 'eq2') TO (SELECT FROM Sensor WHERE sensorId = 's1'); +CREATE EDGE MONITORED_BY FROM (SELECT FROM Equipment WHERE equipmentId = 'eq3') TO (SELECT FROM Sensor WHERE sensorId = 's3'); +-- SensorReading documents (eq1 showing anomalous high temperature and vibration) +INSERT INTO SensorReading SET equipmentId = 'eq1', temperature = 85.2, vibration = 4.5, pressure = 6.1, recordedAt = '2026-03-15 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq1', temperature = 92.1, vibration = 5.8, pressure = 6.3, recordedAt = '2026-03-16 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq1', temperature = 98.5, vibration = 7.2, pressure = 6.5, recordedAt = '2026-03-17 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq1', temperature = 105.3, vibration = 9.1, pressure = 6.8, recordedAt = '2026-03-18 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq2', temperature = 42.1, vibration = 1.2, pressure = 4.5, recordedAt = '2026-03-15 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq2', temperature = 43.0, vibration = 1.3, pressure = 4.6, recordedAt = '2026-03-16 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq2', temperature = 41.8, vibration = 1.1, pressure = 4.4, recordedAt = '2026-03-17 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq2', temperature = 42.5, vibration = 1.2, pressure = 4.5, recordedAt = '2026-03-18 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq3', temperature = 55.0, vibration = 2.0, pressure = 8.1, recordedAt = '2026-03-15 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq3', temperature = 54.5, vibration = 2.1, pressure = 8.0, recordedAt = '2026-03-16 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq3', temperature = 56.2, vibration = 1.9, pressure = 8.2, recordedAt = '2026-03-17 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq3', temperature = 55.8, vibration = 2.0, pressure = 8.1, recordedAt = '2026-03-18 08:00:00'; +-- === Feature Store Infrastructure: FeatureSnapshots === +INSERT INTO FeatureSnapshot SET entityId = 'a1', entityType = 'Account', featureVector = [4, 3, 5, 12, 2950, 0.15], computedAt = '2026-03-15 00:00:00', modelVersion = 'fraud-v2.1'; +INSERT INTO FeatureSnapshot SET entityId = 'a4', entityType = 'Account', featureVector = [8, 6, 3, 67, 145000, 0.87], computedAt = '2026-03-15 00:00:00', modelVersion = 'fraud-v2.1'; +INSERT INTO FeatureSnapshot SET entityId = 'a6', entityType = 'Account', featureVector = [11, 5, 1, 100, 275000, 0.99], computedAt = '2026-03-15 00:00:00', modelVersion = 'fraud-v2.1'; +``` + +**Step 2: Commit** + +```bash +git add feature-store/sql/02-data.sql +git commit -m "feat(feature-store): add multi-domain sample data" +``` + +--- + +### Task 5: Write setup.sh + +**Files:** +- Create: `feature-store/setup.sh` + +Copy the pattern from recommendation-engine: wait for ArcadeDB, create database `FeatureStore`, apply sql files line-by-line. + +**Step 1: Write the script** + +```bash +#!/usr/bin/env bash +set -euo pipefail + +ARCADEDB_URL="${ARCADEDB_URL:-http://localhost:2480}" +ARCADEDB_USER="${ARCADEDB_USER:-root}" +ARCADEDB_PASS="${ARCADEDB_PASS:-arcadedb}" +DB_NAME="FeatureStore" + +# ── Wait for ArcadeDB ───────────────────────────────────────────────────────── +echo "Waiting for ArcadeDB at ${ARCADEDB_URL}..." +until curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + "${ARCADEDB_URL}/api/v1/ready" > /dev/null 2>&1; do + sleep 2 +done +echo "ArcadeDB is ready." + +# ── Create database ─────────────────────────────────────────────────────────── +echo "Creating database ${DB_NAME}..." +curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + -X POST "${ARCADEDB_URL}/api/v1/server" \ + -H "Content-Type: application/json" \ + -d "{\"command\": \"create database ${DB_NAME}\"}" > /dev/null || true +echo "Database ready." + +# ── Helper: send one SQL statement ─────────────────────────────────────────── +send_sql() { + local stmt="$1" + jq -cn --arg cmd "$stmt" '{"language":"sql","command":$cmd}' \ + | curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + -X POST "${ARCADEDB_URL}/api/v1/command/${DB_NAME}" \ + -H "Content-Type: application/json" \ + -d @- > /dev/null +} + +# ── Apply a SQL file (one statement per line) ───────────────────────────────── +apply_file() { + local file="$1" + echo "Applying ${file}..." + while IFS= read -r line || [[ -n "$line" ]]; do + # skip blank lines and SQL comments + [[ -z "${line//[[:space:]]/}" || "$line" =~ ^[[:space:]]*-- ]] && continue + send_sql "${line%%;}" + done < "$file" + echo "Done: ${file}" +} + +apply_file "sql/01-schema.sql" +apply_file "sql/02-data.sql" + +echo "" +echo "Setup complete. ${DB_NAME} is ready." +``` + +**Step 2: Make executable** + +```bash +chmod +x feature-store/setup.sh +``` + +**Step 3: Run it (ArcadeDB must be up from Task 2)** + +```bash +cd feature-store +./setup.sh +``` + +Expected: no errors, ends with "Setup complete." + +**Step 4: Smoke-test** + +```bash +curl -s -u root:arcadedb \ + -X POST "http://localhost:2480/api/v1/query/FeatureStore" \ + -H "Content-Type: application/json" \ + -d '{"language":"sql","command":"SELECT count(*) FROM Account"}' | jq . +``` + +Expected: count = 6. + +**Step 5: Commit** + +```bash +git add feature-store/setup.sh +git commit -m "feat(feature-store): add database setup script" +``` + +--- + +### Task 6: Write queries/queries.sh + +**Files:** +- Create: `feature-store/queries/queries.sh` + +All 11 query patterns as curl calls. Uses `query()` helper for both SQL and Cypher. + +**Step 1: Write the script** + +```bash +#!/usr/bin/env bash +# AI/ML Feature Store — all 11 query patterns via curl +# Prerequisites: ArcadeDB running, setup.sh already executed, jq installed +# Usage: ./queries/queries.sh + +set -euo pipefail + +ARCADEDB_URL="${ARCADEDB_URL:-http://localhost:2480}" +ARCADEDB_USER="${ARCADEDB_USER:-root}" +ARCADEDB_PASS="${ARCADEDB_PASS:-arcadedb}" +AUTH="${ARCADEDB_USER}:${ARCADEDB_PASS}" +DB="FeatureStore" +QUERY_URL="${ARCADEDB_URL}/api/v1/query/${DB}" +COMMAND_URL="${ARCADEDB_URL}/api/v1/command/${DB}" + +query() { + local lang="$1" cmd="$2" + jq -cn --arg l "$lang" --arg c "$cmd" '{"language":$l,"command":$c}' \ + | curl -sf -u "$AUTH" -X POST "$QUERY_URL" \ + -H "Content-Type: application/json" -d @- \ + | jq '.result' +} + +command() { + local lang="$1" cmd="$2" + jq -cn --arg l "$lang" --arg c "$cmd" '{"language":$l,"command":$c}' \ + | curl -sf -u "$AUTH" -X POST "$COMMAND_URL" \ + -H "Content-Type: application/json" -d @- \ + | jq '.result' +} + +# ───────────────────────────────────────────────────────────────────────────── +echo "========== FRAUD DOMAIN ==========" +echo "" +echo "=== Query 1: Account Graph Features (SQL MATCH) ===" +echo "Compute graph topology features for account a4." +echo "" +query "sql" " +SELECT inDeg, outDeg, counterparties +FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties +) +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 2: Distance to Flagged Account (SQL MATCH) ===" +echo "Find shortest path from a4 to nearest flagged account via transfers." +echo "" +query "sql" " +SELECT flaggedId, depth +FROM ( + MATCH {type: Account, where: (accountId = 'a4')} + .both('TRANSFERRED'){while: (\$depth < 4), as: hop} + {type: Account, where: (flagged = true), as: flagged} + RETURN flagged.accountId AS flaggedId, \$depth AS depth +) +ORDER BY depth ASC +LIMIT 1 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 3: Behavior Similarity Search (SQL) ===" +echo "Find accounts with behavior vectors similar to flagged a6 [0.9,0.8,0.1,0.2]." +echo "" +query "sql" " +SELECT accountId, accountType, flagged +FROM Account +ORDER BY vectorNeighbors('Account[behaviorVec]', [0.9, 0.8, 0.1, 0.2], 10) DESC +LIMIT 5 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 4: Transaction Velocity (SQL) ===" +echo "Aggregate TransactionMetric for velocity features per account." +echo "" +query "sql" " +SELECT accountId, + sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount +FROM TransactionMetric +GROUP BY accountId +ORDER BY totalTx DESC +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 5: Shared Device Network (Cypher) ===" +echo "Find accounts sharing devices with flagged accounts." +echo "" +query "cypher" " +MATCH (flagged:Account {flagged: true}) + -[:LINKED_DEVICE]-(suspect:Account) +WHERE suspect.flagged = false +RETURN DISTINCT suspect.accountId, suspect.accountType, + flagged.accountId AS linkedToFlagged +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "========== RECOMMENDATION DOMAIN ==========" +echo "" +echo "=== Query 6: Collaborative Filtering (Cypher) ===" +echo "Find products to recommend to u1 based on shared purchases." +echo "" +query "cypher" " +MATCH (me:User {userId: 'u1'}) + -[:PURCHASED]->(p:Product) + <-[:PURCHASED]-(other:User) + -[:PURCHASED]->(rec:Product) +WHERE rec <> p + AND NOT (me)-[:PURCHASED]->(rec) +RETURN rec.name, rec.category, count(DISTINCT other) AS score +ORDER BY score DESC LIMIT 10 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 7: Product Embedding Search (SQL) ===" +echo "Find products similar to Laptop embedding [0.9,0.1,0.1,0.1]." +echo "" +query "sql" " +SELECT name, category, price +FROM Product +ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 10) DESC +LIMIT 5 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 8: Personalized Ranking (SQL) ===" +echo "Rank Electronics products for u1 by preference vector similarity." +echo "" +query "sql" " +SELECT name, price +FROM Product +WHERE category = 'Electronics' +ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 20) DESC +LIMIT 10 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "========== MAINTENANCE DOMAIN ==========" +echo "" +echo "=== Query 9: Equipment Dependency Chain (SQL MATCH) ===" +echo "Find all downstream equipment affected if eq1 fails." +echo "" +query "sql" " +SELECT name, failureRate, criticality +FROM ( + MATCH {type: Equipment, where: (equipmentId = 'eq1')} + .in('DEPENDS_ON'){as: dep} + RETURN dep.name AS name, dep.failureRate AS failureRate, + dep.out('DEPENDS_ON')[0].criticality AS criticality +) +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 10: Sensor Anomaly Detection (SQL) ===" +echo "Find equipment with anomalous sensor readings." +echo "" +query "sql" " +SELECT equipmentId, + avg(temperature) AS avgTemp, + max(vibration) AS maxVibration, + avg(pressure) AS avgPressure +FROM SensorReading +GROUP BY equipmentId +ORDER BY avgTemp DESC +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "========== CROSS-DOMAIN ==========" +echo "" +echo "=== Query 11: Feature Vector Assembly (Multi-step) ===" +echo "Assemble a fraud feature vector for account a4." +echo "" + +echo "--- Step 1: Graph features (degree + counterparties) ---" +query "sql" " +SELECT inDeg, outDeg, counterparties +FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties +) +" + +echo "" +echo "--- Step 2: Vector features (similarity rank to known fraud) ---" +query "sql" " +SELECT accountId, flagged +FROM Account +ORDER BY vectorNeighbors('Account[behaviorVec]', [0.7, 0.6, 0.2, 0.3], 10) DESC +LIMIT 5 +" + +echo "" +echo "--- Step 3: Time-series features (transaction velocity) ---" +query "sql" " +SELECT sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount +FROM TransactionMetric +WHERE accountId = 'a4' +" + +echo "" +echo "--- Step 4: Store feature snapshot ---" +command "sql" " +INSERT INTO FeatureSnapshot SET entityId = 'a4', entityType = 'Account', + featureVector = [8, 6, 3, 67, 145000, 0.87], + computedAt = '2026-03-23 00:00:00', modelVersion = 'fraud-v2.2' +" +echo "(Snapshot stored)" + +echo "" +echo "--- Verify: Feature snapshots for a4 ---" +query "sql" " +SELECT entityId, modelVersion, computedAt +FROM FeatureSnapshot +WHERE entityId = 'a4' +ORDER BY computedAt DESC +" +``` + +**Step 2: Make executable** + +```bash +chmod +x feature-store/queries/queries.sh +``` + +**Step 3: Run and verify all 11 queries return results** + +```bash +cd feature-store +./queries/queries.sh +``` + +Expected for key queries: +- Query 1: inDeg/outDeg/counterparties for a4 +- Query 3: a6, a5, a4 near top (fraud-like vectors) +- Query 5: a4 and a5 as suspects linked to a6 +- Query 6: ML Textbook recommended to u1 (via shared purchases with u2) +- Query 10: eq1 with highest avgTemp (~95) + +**Step 4: Commit** + +```bash +git add feature-store/queries/queries.sh +git commit -m "feat(feature-store): add 11 curl query demonstrations" +``` + +--- + +### Task 7: Write java/pom.xml + +**Files:** +- Create: `feature-store/java/pom.xml` + +**Step 1: Write the pom.xml** + +```xml + + + 4.0.0 + + com.arcadedb.examples + feature-store + 1.0-SNAPSHOT + jar + + + 21 + 21 + UTF-8 + 26.3.2 + + + + + com.arcadedb + arcadedb-network + ${arcadedb.version} + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.7.1 + + + + com.arcadedb.examples.FeatureStore + + + + jar-with-dependencies + + feature-store + false + + + + make-assembly + package + + single + + + + + + + +``` + +**Step 2: Verify Maven resolves dependencies** + +```bash +cd feature-store/java +mvn dependency:resolve -q +``` + +**Step 3: Commit** + +```bash +git add feature-store/java/pom.xml +git commit -m "feat(feature-store): add Maven project for Java demo" +``` + +--- + +### Task 8: Write FeatureStore.java + +**Files:** +- Create: `feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java` + +Follows the `tryRun()`/`printHeader()` pattern. Implements all 11 queries. Uses `RemoteDatabase` with `query()` for reads and `command()` for writes. Cypher queries use `"cypher"` language. + +Note: For Cypher queries returning properties like `rec.name`, use `getProperty("rec.name")`. Be aware of the `ClassCastException` issue with `collect()` — avoid `collect()` in Cypher. + +**Step 1: Write the Java class** + +The class should: +1. Open a `RemoteDatabase` connection +2. Run all 11 queries sequentially wrapped in `tryRun()` +3. Print formatted output for each +4. For Query 11, use `db.command()` for the INSERT + +**Step 2: Build and run** + +```bash +cd feature-store/java +mvn package -q +java -jar target/feature-store.jar +``` + +**Step 3: If a query throws an exception**, note the error and adjust: +- If `$depth` variable causes issues in MATCH: try alternative syntax +- If Cypher `RETURN DISTINCT` fails: remove DISTINCT +- If `vectorNeighbors` ordering seems wrong: verify the index name format `TypeName[propertyName]` + +**Step 4: Commit once all queries run successfully** + +```bash +git add feature-store/java/ +git commit -m "feat(feature-store): add Java FeatureStore main class" +``` + +--- + +### Task 9: Write js/package.json + +**Files:** +- Create: `feature-store/js/package.json` + +```json +{ + "name": "feature-store", + "version": "1.0.0", + "private": true, + "description": "ArcadeDB Feature Store queries via PostgreSQL protocol", + "main": "feature-store.js", + "dependencies": { + "pg": "^8.13.0" + } +} +``` + +**Step 1: Install dependencies** + +```bash +cd feature-store/js +npm install +``` + +**Step 2: Commit** (include package.json only, not package-lock.json — it will be gitignored or committed separately) + +```bash +git add feature-store/js/package.json +git commit -m "feat(feature-store): add Node.js package for PostgreSQL protocol" +``` + +--- + +### Task 10: Write js/feature-store.js + +**Files:** +- Create: `feature-store/js/feature-store.js` + +Follows supply-chain/js pattern: `pg.Client`, `printHeader()`, `tryRun()`, sequential queries. Cypher queries use `{cypher}` prefix. All 11 queries. + +**Step 1: Write the script** + +Connection config from env vars: `ARCADEDB_HOST`, `ARCADEDB_PG_PORT` (default 5432), `ARCADEDB_USER`, `ARCADEDB_PASS`. Database: `FeatureStore`. + +For Cypher queries (5, 6, 8), prefix with `{cypher}`: +```javascript +const sql = `{cypher} MATCH (flagged:Account {flagged: true}) ...`; +``` + +For Query 11 Step 4 (INSERT), use `client.query()` with the SQL INSERT statement. + +**Step 2: Run and verify** + +```bash +cd feature-store/js +node feature-store.js +``` + +**Step 3: Commit** + +```bash +git add feature-store/js/feature-store.js +git commit -m "feat(feature-store): add JavaScript queries via PostgreSQL protocol" +``` + +--- + +### Task 11: Write feature-store/README.md + +**Files:** +- Create: `feature-store/README.md` + +Follow the recommendation-engine README format. Include: +- Overview (3 ML teams, unified feature store) +- Prerequisites (Docker, curl, jq, Java 21, Maven, Node.js 22) +- Quickstart (docker compose up, setup.sh, queries.sh, Java, JS) +- Schema table (all 15 types) +- Query patterns table (all 11) +- Sample data summary +- ArcadeDB version notes +- Reference link + +**Step 1: Write the README** + +**Step 2: Commit** + +```bash +git add feature-store/README.md +git commit -m "docs(feature-store): add README with quickstart guide" +``` + +--- + +### Task 12: Create CI workflow + +**Files:** +- Create: `.github/workflows/feature-store.yml` + +Copy from `supply-chain.yml`, change 5 values: +1. `name: Feature Store CI` +2. `paths: feature-store/**` and `.github/workflows/feature-store.yml` +3. Cache keys: `feature-store` instead of `supply-chain` +4. `working-directory: feature-store` (and `feature-store/java`, `feature-store/js`) +5. JAR filename: `feature-store.jar` + +Matrix: `[curl, java, js]` — same as supply-chain. + +Use the same pinned action SHAs from supply-chain.yml. + +**Step 1: Write the workflow file** + +**Step 2: Commit** + +```bash +git add .github/workflows/feature-store.yml +git commit -m "ci: add Feature Store CI workflow" +``` + +--- + +### Task 13: Update dependabot.yml + +**Files:** +- Modify: `.github/dependabot.yml` + +Add three new entries before the closing of the file, following existing patterns: + +1. Maven entry for `/feature-store/java` with `arcadedb` and `maven-plugins` groups +2. npm entry for `/feature-store/js` with `node-pg` group +3. Docker Compose entry for `/feature-store` with `arcadedb-docker` group + +**Step 1: Read current file and append entries** + +**Step 2: Commit** + +```bash +git add .github/dependabot.yml +git commit -m "chore: add dependabot entries for feature-store module" +``` + +--- + +### Task 14: Update root README.md + +**Files:** +- Modify: `README.md` + +Add a row to the use cases table: + +```markdown +| [feature-store](./feature-store/) | Unified ML feature store for fraud, recommendations, and maintenance | Graph traversal, Vector similarity, Time-series, PostgreSQL protocol, JavaScript | +``` + +**Step 1: Insert row into table** + +**Step 2: Commit** + +```bash +git add README.md +git commit -m "docs: add feature-store to root README use cases table" +``` + +--- + +### Task 15: Final cleanup and push + +**Step 1: Stop Docker** + +```bash +cd feature-store +docker compose down +``` + +**Step 2: Verify clean git state** + +```bash +git status +git log --oneline -15 +``` + +**Step 3: Push branch** + +```bash +git push origin feat/feature-store +``` diff --git a/feature-store/README.md b/feature-store/README.md new file mode 100644 index 0000000..1b22204 --- /dev/null +++ b/feature-store/README.md @@ -0,0 +1,135 @@ +# AI/ML Feature Store + +Demonstrates ArcadeDB as a unified feature store for production ML systems, +replacing the typical scatter of 3+ specialized databases (graph DB, vector DB, +time-series DB) with a single multi-model engine serving three ML teams: + +- **Fraud scoring** — graph topology + behavior vectors + transaction velocity +- **Product recommendations** — collaborative filtering + product embeddings +- **Predictive maintenance** — equipment dependency graph + sensor aggregates + +A cross-cutting Feature Store infrastructure layer records feature snapshots +for audit and lineage, demonstrating training-serving consistency. + +## Prerequisites + +- Docker and Docker Compose +- `curl` and `jq` +- Java 21+ and Maven 3.x (for the Java demo) +- Node.js 22+ (for the JavaScript demo) + +## Quickstart + +### 1. Start ArcadeDB + +```bash +docker compose up -d +``` + +### 2. Create database and load data + +```bash +./setup.sh +``` + +This creates the `FeatureStore` database, applies the schema, and inserts sample data. + +### 3a. Run queries via curl + +```bash +./queries/queries.sh +``` + +### 3b. Run queries via Java + +```bash +cd java +mvn package -q +java -jar target/feature-store.jar +``` + +### 3c. Run queries via JavaScript (PostgreSQL protocol) + +```bash +cd js +npm install +node feature-store.js +``` + +## Schema + +| Type | Kind | Key properties | +|------|------|----------------| +| `Account` | Vertex | `accountId`, `accountType`, `signupSource`, `flagged`, `behaviorVec` | +| `Merchant` | Vertex | `merchantId`, `category`, `riskTier` | +| `User` | Vertex | `userId`, `preferenceVec` | +| `Product` | Vertex | `productId`, `name`, `category`, `price`, `embedding` | +| `Equipment` | Vertex | `equipmentId`, `name`, `specifications`, `failureRate` | +| `Sensor` | Vertex | `sensorId`, `sensorType`, `unit` | +| `TRANSFERRED` | Edge | Account → Account (`amount`, `recordedAt`) | +| `LINKED_DEVICE` | Edge | Account → Account (`deviceId`) | +| `TRANSACTED` | Edge | Account → Merchant (`amount`, `recordedAt`) | +| `PURCHASED` | Edge | User → Product | +| `DEPENDS_ON` | Edge | Equipment → Equipment (`criticality`) | +| `MONITORED_BY` | Edge | Equipment → Sensor | +| `TransactionMetric` | Document | `accountId`, `txCount`, `totalAmount`, `recordedAt` | +| `SensorReading` | Document | `equipmentId`, `temperature`, `vibration`, `pressure`, `recordedAt` | +| `FeatureSnapshot` | Document | `entityId`, `entityType`, `featureVector`, `computedAt`, `modelVersion` | + +## Query Patterns + +| # | Pattern | Language | Signal type | Domain | +|---|---------|----------|-------------|--------| +| 1 | Account Graph Features | SQL MATCH | Graph | Fraud | +| 2 | Distance to Flagged Account | SQL MATCH | Graph | Fraud | +| 3 | Behavior Similarity Search | SQL + vectorNeighbors | Vector | Fraud | +| 4 | Transaction Velocity | SQL | Time-series | Fraud | +| 5 | Shared Device Network | Cypher | Graph | Fraud | +| 6 | Collaborative Filtering | Cypher | Graph | Recommendations | +| 7 | Product Embedding Search | SQL + vectorNeighbors | Vector | Recommendations | +| 8 | Personalized Ranking | SQL + vectorNeighbors | Vector | Recommendations | +| 9 | Equipment Dependency Chain | SQL MATCH | Graph | Maintenance | +| 10 | Sensor Anomaly Detection | SQL | Time-series | Maintenance | +| 11 | Feature Vector Assembly | SQL (multi-step) | All | Cross-domain | + +## Sample Data + +### Fraud Domain +- 6 accounts (a1–a3 legit, a4–a5 suspicious, a6 flagged) with 4-d behavior vectors +- 4 merchants (grocery, electronics, gambling, crypto) +- ~12 TRANSFERRED edges with circular money flow pattern (a4↔a5↔a6) +- 4 LINKED_DEVICE edges (a4, a5 share devices with flagged a6) +- 15 TransactionMetric documents across 3 time buckets + +### Recommendation Domain +- 5 users with 4-d preference vectors +- 8 products across Electronics, Books, Sports with 4-d embeddings +- 14 PURCHASED edges with deliberate overlap (u1+u2 share purchases → collab signal) + +### Maintenance Domain +- 5 equipment units with dependency chain (eq2, eq3, eq4 depend on eq1) +- 3 sensors monitoring equipment +- 12 SensorReading documents (eq1 showing anomalous high temperature/vibration) + +### Feature Store Infrastructure +- 3 FeatureSnapshot documents for audit trail + +## Connectivity + +| Runner | Protocol | Port | +|--------|----------|------| +| curl / shell | HTTP API | 2480 | +| Java (`arcadedb-network`) | HTTP API | 2480 | +| JavaScript (`pg`) | PostgreSQL wire protocol | 5432 | + +The JavaScript module uses the `{cypher}` prefix for Cypher queries over PostgreSQL protocol. + +## ArcadeDB Version Notes + +This use case targets ArcadeDB **26.3.2**. Vector similarity queries use +`vectorNeighbors('TypeName[property]', vector, k)` with an `LSM_VECTOR` +index. + +## Reference + +[ArcadeDB AI/ML Feature Store use case](https://arcadedb.com/ai-ml-feature-store.html) diff --git a/feature-store/docker-compose.yml b/feature-store/docker-compose.yml new file mode 100644 index 0000000..9b6647a --- /dev/null +++ b/feature-store/docker-compose.yml @@ -0,0 +1,16 @@ +services: + arcadedb: + image: arcadedata/arcadedb:26.3.2 + ports: + - "2480:2480" + - "5432:5432" + environment: + JAVA_OPTS: >- + -Darcadedb.server.rootPassword=arcadedb + -Darcadedb.server.plugins=Postgres:com.arcadedb.postgres.PostgresProtocolPlugin + healthcheck: + test: ["CMD", "curl", "-sf", "http://localhost:2480/api/v1/ready"] + interval: 5s + timeout: 3s + retries: 20 + start_period: 10s diff --git a/feature-store/java/pom.xml b/feature-store/java/pom.xml new file mode 100644 index 0000000..57292d1 --- /dev/null +++ b/feature-store/java/pom.xml @@ -0,0 +1,57 @@ + + + 4.0.0 + + com.arcadedb.examples + feature-store + 1.0-SNAPSHOT + jar + + + 21 + 21 + UTF-8 + 26.3.2 + + + + + com.arcadedb + arcadedb-network + ${arcadedb.version} + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.7.1 + + + + com.arcadedb.examples.FeatureStore + + + + jar-with-dependencies + + feature-store + false + + + + make-assembly + package + + single + + + + + + + diff --git a/feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java b/feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java new file mode 100644 index 0000000..7152984 --- /dev/null +++ b/feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java @@ -0,0 +1,391 @@ +package com.arcadedb.examples; + +import com.arcadedb.query.sql.executor.Result; +import com.arcadedb.query.sql.executor.ResultSet; +import com.arcadedb.remote.RemoteDatabase; + +public class FeatureStore { + + private static final String HOST = System.getenv().getOrDefault("ARCADEDB_HOST", "localhost"); + private static final int PORT = Integer.parseInt(System.getenv().getOrDefault("ARCADEDB_PORT", "2480")); + private static final String DB_NAME = "FeatureStore"; + private static final String USER = System.getenv().getOrDefault("ARCADEDB_USER", "root"); + private static final String PASSWORD = System.getenv().getOrDefault("ARCADEDB_PASS", "arcadedb"); + + public static void main(String[] args) { + try (RemoteDatabase db = new RemoteDatabase(HOST, PORT, DB_NAME, USER, PASSWORD)) { + System.out.println("========== FRAUD DOMAIN =========="); + tryRun(() -> runQuery1AccountGraphFeatures(db), "Query 1"); + tryRun(() -> runQuery2DistanceToFlagged(db), "Query 2"); + tryRun(() -> runQuery3BehaviorSimilarity(db), "Query 3"); + tryRun(() -> runQuery4TransactionVelocity(db), "Query 4"); + tryRun(() -> runQuery5SharedDeviceNetwork(db), "Query 5"); + + System.out.println("\n========== RECOMMENDATION DOMAIN =========="); + tryRun(() -> runQuery6CollaborativeFiltering(db), "Query 6"); + tryRun(() -> runQuery7ProductEmbeddingSearch(db), "Query 7"); + tryRun(() -> runQuery8PersonalizedRanking(db), "Query 8"); + + System.out.println("\n========== MAINTENANCE DOMAIN =========="); + tryRun(() -> runQuery9EquipmentDependencyChain(db), "Query 9"); + tryRun(() -> runQuery10SensorAnomalyDetection(db), "Query 10"); + + System.out.println("\n========== CROSS-DOMAIN =========="); + tryRun(() -> runQuery11FeatureVectorAssembly(db), "Query 11"); + } + System.out.println("\nAll queries complete."); + } + + private static void tryRun(Runnable r, String name) { + try { + r.run(); + } catch (Exception e) { + System.err.println("[" + name + " FAILED] " + e.getMessage()); + } + } + + // ── Query 1: Account Graph Features (SQL MATCH) ──────────────────────────── + private static void runQuery1AccountGraphFeatures(RemoteDatabase db) { + printHeader("Query 1: Account Graph Features (SQL MATCH)", + "Compute graph topology features for account a4."); + + String sql = + """ + SELECT inDeg, outDeg, counterparties + FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties + )"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" inDeg: " + r.getProperty("inDeg") + + " | outDeg: " + r.getProperty("outDeg") + + " | counterparties: " + r.getProperty("counterparties")); + } + } + } + + // ── Query 2: Distance to Flagged Account (SQL MATCH) ─────────────────────── + private static void runQuery2DistanceToFlagged(RemoteDatabase db) { + printHeader("Query 2: Distance to Flagged Account (SQL MATCH)", + "Find shortest path from a4 to nearest flagged account via transfers."); + + String sql = + """ + SELECT flaggedId, depth + FROM ( + MATCH {type: Account, where: (accountId = 'a4')} + .both('TRANSFERRED'){while: ($depth < 4), as: hop} + {type: Account, where: (flagged = true), as: flagged} + RETURN flagged.accountId AS flaggedId, $depth AS depth + ) + ORDER BY depth ASC + LIMIT 1"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" flagged: " + r.getProperty("flaggedId") + + " | depth: " + r.getProperty("depth")); + } + } + } + + // ── Query 3: Behavior Similarity Search (SQL) ────────────────────────────── + private static void runQuery3BehaviorSimilarity(RemoteDatabase db) { + printHeader("Query 3: Behavior Similarity Search (SQL)", + "Find accounts with behavior vectors similar to flagged a6 [0.9,0.8,0.1,0.2]."); + + String sql = + """ + SELECT accountId, accountType, flagged + FROM Account + ORDER BY vectorNeighbors('Account[behaviorVec]', [0.9, 0.8, 0.1, 0.2], 10) DESC + LIMIT 5"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" " + r.getProperty("accountId") + + " | type: " + r.getProperty("accountType") + + " | flagged: " + r.getProperty("flagged")); + } + } + } + + // ── Query 4: Transaction Velocity (SQL) ──────────────────────────────────── + private static void runQuery4TransactionVelocity(RemoteDatabase db) { + printHeader("Query 4: Transaction Velocity (SQL)", + "Aggregate TransactionMetric for velocity features per account."); + + String sql = + """ + SELECT accountId, + sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount + FROM TransactionMetric + GROUP BY accountId + ORDER BY totalTx DESC"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" " + r.getProperty("accountId") + + " | totalTx: " + r.getProperty("totalTx") + + " | totalAmount: " + r.getProperty("totalAmount") + + " | avgBucket: " + r.getProperty("avgBucketAmount")); + } + } + } + + // ── Query 5: Shared Device Network (Cypher) ──────────────────────────────── + private static void runQuery5SharedDeviceNetwork(RemoteDatabase db) { + printHeader("Query 5: Shared Device Network (Cypher)", + "Find accounts sharing devices with flagged accounts."); + + String cypher = + """ + MATCH (flagged:Account {flagged: true}) + -[:LINKED_DEVICE]-(suspect:Account) + WHERE suspect.flagged = false + RETURN DISTINCT suspect.accountId, suspect.accountType, + flagged.accountId AS linkedToFlagged"""; + + try (ResultSet rs = db.query("cypher", cypher)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" " + r.getProperty("suspect.accountId") + + " | type: " + r.getProperty("suspect.accountType") + + " | linked to: " + r.getProperty("linkedToFlagged")); + } + } + } + + // ── Query 6: Collaborative Filtering (Cypher) ────────────────────────────── + private static void runQuery6CollaborativeFiltering(RemoteDatabase db) { + printHeader("Query 6: Collaborative Filtering (Cypher)", + "Find products to recommend to u1 based on shared purchases."); + + String cypher = + """ + MATCH (me:User {userId: 'u1'}) + -[:PURCHASED]->(p:Product) + <-[:PURCHASED]-(other:User) + -[:PURCHASED]->(rec:Product) + WHERE rec <> p + AND NOT (me)-[:PURCHASED]->(rec) + RETURN rec.name, rec.category, count(DISTINCT other) AS score + ORDER BY score DESC LIMIT 10"""; + + try (ResultSet rs = db.query("cypher", cypher)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" " + r.getProperty("rec.name") + + " | " + r.getProperty("rec.category") + + " | score: " + r.getProperty("score")); + } + } + } + + // ── Query 7: Product Embedding Search (SQL) ──────────────────────────────── + private static void runQuery7ProductEmbeddingSearch(RemoteDatabase db) { + printHeader("Query 7: Product Embedding Search (SQL)", + "Find products similar to Laptop embedding [0.9,0.1,0.1,0.1]."); + + String sql = + """ + SELECT name, category, price + FROM Product + ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 10) DESC + LIMIT 5"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-20s | %-12s | $%-8.2f%n", + r.getProperty("name"), + r.getProperty("category"), + ((Number) r.getProperty("price")).doubleValue()); + } + } + } + + // ── Query 8: Personalized Ranking (SQL) ──────────────────────────────────── + private static void runQuery8PersonalizedRanking(RemoteDatabase db) { + printHeader("Query 8: Personalized Ranking (SQL)", + "Rank Electronics products for u1 by preference vector similarity."); + + String sql = + """ + SELECT name, price + FROM Product + WHERE category = 'Electronics' + ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 20) DESC + LIMIT 10"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-20s | $%-8.2f%n", + r.getProperty("name"), + ((Number) r.getProperty("price")).doubleValue()); + } + } + } + + // ── Query 9: Equipment Dependency Chain (SQL MATCH) ──────────────────────── + private static void runQuery9EquipmentDependencyChain(RemoteDatabase db) { + printHeader("Query 9: Equipment Dependency Chain (SQL MATCH)", + "Find all downstream equipment affected if eq1 fails."); + + String sql = + """ + SELECT name, failureRate, criticality + FROM ( + MATCH {type: Equipment, where: (equipmentId = 'eq1')} + .in('DEPENDS_ON'){as: dep} + RETURN dep.name AS name, dep.failureRate AS failureRate, + dep.out('DEPENDS_ON')[0].criticality AS criticality + )"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" " + r.getProperty("name") + + " | failureRate: " + r.getProperty("failureRate") + + " | criticality: " + r.getProperty("criticality")); + } + } + } + + // ── Query 10: Sensor Anomaly Detection (SQL) ────────────────────────────── + private static void runQuery10SensorAnomalyDetection(RemoteDatabase db) { + printHeader("Query 10: Sensor Anomaly Detection (SQL)", + "Find equipment with anomalous sensor readings."); + + String sql = + """ + SELECT equipmentId, + avg(temperature) AS avgTemp, + max(vibration) AS maxVibration, + avg(pressure) AS avgPressure + FROM SensorReading + GROUP BY equipmentId + ORDER BY avgTemp DESC"""; + + try (ResultSet rs = db.query("sql", sql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.printf(" %-15s | avgTemp: %6.1f | maxVib: %4.1f | avgPressure: %4.1f%n", + r.getProperty("equipmentId"), + ((Number) r.getProperty("avgTemp")).doubleValue(), + ((Number) r.getProperty("maxVibration")).doubleValue(), + ((Number) r.getProperty("avgPressure")).doubleValue()); + } + } + } + + // ── Query 11: Feature Vector Assembly (Multi-step) ───────────────────────── + private static void runQuery11FeatureVectorAssembly(RemoteDatabase db) { + printHeader("Query 11: Feature Vector Assembly (Multi-step)", + "Assemble a fraud feature vector for account a4."); + + // Step 1: Graph features + System.out.println(" --- Step 1: Graph features (degree + counterparties) ---"); + String graphSql = + """ + SELECT inDeg, outDeg, counterparties + FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties + )"""; + + try (ResultSet rs = db.query("sql", graphSql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" inDeg: " + r.getProperty("inDeg") + + " | outDeg: " + r.getProperty("outDeg") + + " | counterparties: " + r.getProperty("counterparties")); + } + } + + // Step 2: Vector features + System.out.println(" --- Step 2: Vector features (similarity rank to known fraud) ---"); + String vectorSql = + """ + SELECT accountId, flagged + FROM Account + ORDER BY vectorNeighbors('Account[behaviorVec]', [0.7, 0.6, 0.2, 0.3], 10) DESC + LIMIT 5"""; + + try (ResultSet rs = db.query("sql", vectorSql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" " + r.getProperty("accountId") + + " | flagged: " + r.getProperty("flagged")); + } + } + + // Step 3: Time-series features + System.out.println(" --- Step 3: Time-series features (transaction velocity) ---"); + String tsSql = + """ + SELECT sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount + FROM TransactionMetric + WHERE accountId = 'a4'"""; + + try (ResultSet rs = db.query("sql", tsSql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" totalTx: " + r.getProperty("totalTx") + + " | totalAmount: " + r.getProperty("totalAmount") + + " | avgBucket: " + r.getProperty("avgBucketAmount")); + } + } + + // Step 4: Store feature snapshot + System.out.println(" --- Step 4: Store feature snapshot ---"); + String insertSql = + """ + INSERT INTO FeatureSnapshot SET entityId = 'a4', entityType = 'Account', + featureVector = [8, 6, 3, 67, 145000, 0.87], + computedAt = '2026-03-23 00:00:00', modelVersion = 'fraud-v2.2'"""; + + db.command("sql", insertSql); + System.out.println(" (Snapshot stored)"); + + // Verify + System.out.println(" --- Verify: Feature snapshots for a4 ---"); + String verifySql = + """ + SELECT entityId, modelVersion, computedAt + FROM FeatureSnapshot + WHERE entityId = 'a4' + ORDER BY computedAt DESC"""; + + try (ResultSet rs = db.query("sql", verifySql)) { + while (rs.hasNext()) { + Result r = rs.next(); + System.out.println(" " + r.getProperty("entityId") + + " | version: " + r.getProperty("modelVersion") + + " | computed: " + r.getProperty("computedAt")); + } + } + } + + private static void printHeader(String title, String description) { + System.out.println("\n" + "=".repeat(70)); + System.out.println(" " + title); + System.out.println(" " + description); + System.out.println("=".repeat(70)); + } +} diff --git a/feature-store/js/feature-store.js b/feature-store/js/feature-store.js new file mode 100644 index 0000000..cafea76 --- /dev/null +++ b/feature-store/js/feature-store.js @@ -0,0 +1,329 @@ +const { Client } = require('pg'); + +const HOST = process.env.ARCADEDB_HOST || 'localhost'; +const PG_PORT = process.env.ARCADEDB_PG_PORT || '5432'; +const DB_NAME = 'FeatureStore'; +const USER = process.env.ARCADEDB_USER || 'root'; +const PASSWORD = process.env.ARCADEDB_PASS || 'arcadedb'; + +function printHeader(title, description) { + console.log('\n' + '='.repeat(70)); + console.log(' ' + title); + console.log(' ' + description); + console.log('='.repeat(70)); +} + +async function tryRun(fn, name) { + try { + await fn(); + } catch (e) { + console.error('[' + name + ' FAILED] ' + e.message); + } +} + +// ── Query 1: Account Graph Features (SQL MATCH) ───────────────────────────── +async function runQuery1(client) { + printHeader('Query 1: Account Graph Features (SQL MATCH)', + 'Compute graph topology features for account a4.'); + + const sql = ` + SELECT inDeg, outDeg, counterparties + FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties + )`; + + const res = await client.query(sql); + for (const row of res.rows) { + console.log(` inDeg: ${row.indeg} | outDeg: ${row.outdeg} | counterparties: ${row.counterparties}`); + } +} + +// ── Query 2: Distance to Flagged Account (SQL MATCH) ──────────────────────── +async function runQuery2(client) { + printHeader('Query 2: Distance to Flagged Account (SQL MATCH)', + 'Find shortest path from a4 to nearest flagged account via transfers.'); + + const sql = ` + SELECT flaggedId, depth + FROM ( + MATCH {type: Account, where: (accountId = 'a4')} + .both('TRANSFERRED'){while: ($depth < 4), as: hop} + {type: Account, where: (flagged = true), as: flagged} + RETURN flagged.accountId AS flaggedId, $depth AS depth + ) + ORDER BY depth ASC + LIMIT 1`; + + const res = await client.query(sql); + for (const row of res.rows) { + console.log(` flagged: ${row.flaggedid} | depth: ${row.depth}`); + } +} + +// ── Query 3: Behavior Similarity Search (SQL) ─────────────────────────────── +async function runQuery3(client) { + printHeader('Query 3: Behavior Similarity Search (SQL)', + 'Find accounts with behavior vectors similar to flagged a6 [0.9,0.8,0.1,0.2].'); + + const sql = ` + SELECT accountId, accountType, flagged + FROM Account + ORDER BY vectorNeighbors('Account[behaviorVec]', [0.9, 0.8, 0.1, 0.2], 10) DESC + LIMIT 5`; + + const res = await client.query(sql); + for (const row of res.rows) { + console.log(` ${String(row.accountid).padEnd(5)} | type: ${String(row.accounttype).padEnd(10)} | flagged: ${row.flagged}`); + } +} + +// ── Query 4: Transaction Velocity (SQL) ───────────────────────────────────── +async function runQuery4(client) { + printHeader('Query 4: Transaction Velocity (SQL)', + 'Aggregate TransactionMetric for velocity features per account.'); + + const sql = ` + SELECT accountId, + sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount + FROM TransactionMetric + GROUP BY accountId + ORDER BY totalTx DESC`; + + const res = await client.query(sql); + for (const row of res.rows) { + console.log(` ${String(row.accountid).padEnd(5)} | totalTx: ${String(row.totaltx).padStart(4)} | totalAmount: ${String(row.totalamount).padStart(10)} | avgBucket: ${row.avgbucketamount}`); + } +} + +// ── Query 5: Shared Device Network (Cypher via {cypher} prefix) ───────────── +async function runQuery5(client) { + printHeader('Query 5: Shared Device Network (Cypher)', + 'Find accounts sharing devices with flagged accounts.'); + + const sql = `{cypher} MATCH (flagged:Account {flagged: true}) + -[:LINKED_DEVICE]-(suspect:Account) + WHERE suspect.flagged = false + RETURN DISTINCT suspect.accountId, suspect.accountType, + flagged.accountId AS linkedToFlagged`; + + const res = await client.query(sql); + for (const row of res.rows) { + const accountId = row['suspect.accountid'] || row['suspect.accountId']; + const accountType = row['suspect.accounttype'] || row['suspect.accountType']; + const linked = row['linkedtoflagged'] || row['linkedToFlagged']; + console.log(` ${accountId} | type: ${accountType} | linked to: ${linked}`); + } +} + +// ── Query 6: Collaborative Filtering (Cypher via {cypher} prefix) ─────────── +async function runQuery6(client) { + printHeader('Query 6: Collaborative Filtering (Cypher)', + 'Find products to recommend to u1 based on shared purchases.'); + + const sql = `{cypher} MATCH (me:User {userId: 'u1'}) + -[:PURCHASED]->(p:Product) + <-[:PURCHASED]-(other:User) + -[:PURCHASED]->(rec:Product) + WHERE rec <> p + AND NOT (me)-[:PURCHASED]->(rec) + RETURN rec.name, rec.category, count(DISTINCT other) AS score + ORDER BY score DESC LIMIT 10`; + + const res = await client.query(sql); + for (const row of res.rows) { + const name = row['rec.name']; + const category = row['rec.category']; + console.log(` ${String(name).padEnd(20)} | ${String(category).padEnd(12)} | score: ${row.score}`); + } +} + +// ── Query 7: Product Embedding Search (SQL) ───────────────────────────────── +async function runQuery7(client) { + printHeader('Query 7: Product Embedding Search (SQL)', + 'Find products similar to Laptop embedding [0.9,0.1,0.1,0.1].'); + + const sql = ` + SELECT name, category, price + FROM Product + ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 10) DESC + LIMIT 5`; + + const res = await client.query(sql); + for (const row of res.rows) { + console.log(` ${String(row.name).padEnd(20)} | ${String(row.category).padEnd(12)} | $${row.price}`); + } +} + +// ── Query 8: Personalized Ranking (Cypher via {cypher} prefix) ────────────── +async function runQuery8(client) { + printHeader('Query 8: Personalized Ranking (SQL)', + 'Rank Electronics products for u1 by preference vector similarity.'); + + const sql = ` + SELECT name, price + FROM Product + WHERE category = 'Electronics' + ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 20) DESC + LIMIT 10`; + + const res = await client.query(sql); + for (const row of res.rows) { + console.log(` ${String(row.name).padEnd(20)} | $${row.price}`); + } +} + +// ── Query 9: Equipment Dependency Chain (SQL MATCH) ───────────────────────── +async function runQuery9(client) { + printHeader('Query 9: Equipment Dependency Chain (SQL MATCH)', + 'Find all downstream equipment affected if eq1 fails.'); + + const sql = ` + SELECT name, failureRate, criticality + FROM ( + MATCH {type: Equipment, where: (equipmentId = 'eq1')} + .in('DEPENDS_ON'){as: dep} + RETURN dep.name AS name, dep.failureRate AS failureRate, + dep.out('DEPENDS_ON')[0].criticality AS criticality + )`; + + const res = await client.query(sql); + for (const row of res.rows) { + console.log(` ${String(row.name).padEnd(20)} | failureRate: ${row.failurerate} | criticality: ${row.criticality}`); + } +} + +// ── Query 10: Sensor Anomaly Detection (SQL) ──────────────────────────────── +async function runQuery10(client) { + printHeader('Query 10: Sensor Anomaly Detection (SQL)', + 'Find equipment with anomalous sensor readings.'); + + const sql = ` + SELECT equipmentId, + avg(temperature) AS avgTemp, + max(vibration) AS maxVibration, + avg(pressure) AS avgPressure + FROM SensorReading + GROUP BY equipmentId + ORDER BY avgTemp DESC`; + + const res = await client.query(sql); + for (const row of res.rows) { + console.log(` ${String(row.equipmentid).padEnd(5)} | avgTemp: ${Number(row.avgtemp).toFixed(1).padStart(6)} | maxVib: ${Number(row.maxvibration).toFixed(1).padStart(4)} | avgPressure: ${Number(row.avgpressure).toFixed(1)}`); + } +} + +// ── Query 11: Feature Vector Assembly (Multi-step) ────────────────────────── +async function runQuery11(client) { + printHeader('Query 11: Feature Vector Assembly (Multi-step)', + 'Assemble a fraud feature vector for account a4.'); + + // Step 1: Graph features + console.log(' --- Step 1: Graph features (degree + counterparties) ---'); + const graphSql = ` + SELECT inDeg, outDeg, counterparties + FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties + )`; + + const graphRes = await client.query(graphSql); + for (const row of graphRes.rows) { + console.log(` inDeg: ${row.indeg} | outDeg: ${row.outdeg} | counterparties: ${row.counterparties}`); + } + + // Step 2: Vector features + console.log(' --- Step 2: Vector features (similarity rank to known fraud) ---'); + const vectorSql = ` + SELECT accountId, flagged + FROM Account + ORDER BY vectorNeighbors('Account[behaviorVec]', [0.7, 0.6, 0.2, 0.3], 10) DESC + LIMIT 5`; + + const vectorRes = await client.query(vectorSql); + for (const row of vectorRes.rows) { + console.log(` ${row.accountid} | flagged: ${row.flagged}`); + } + + // Step 3: Time-series features + console.log(' --- Step 3: Time-series features (transaction velocity) ---'); + const tsSql = ` + SELECT sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount + FROM TransactionMetric + WHERE accountId = 'a4'`; + + const tsRes = await client.query(tsSql); + for (const row of tsRes.rows) { + console.log(` totalTx: ${row.totaltx} | totalAmount: ${row.totalamount} | avgBucket: ${row.avgbucketamount}`); + } + + // Step 4: Store feature snapshot + console.log(' --- Step 4: Store feature snapshot ---'); + const insertSql = ` + INSERT INTO FeatureSnapshot SET entityId = 'a4', entityType = 'Account', + featureVector = [8, 6, 3, 67, 145000, 0.87], + computedAt = '2026-03-23 00:00:00', modelVersion = 'fraud-v2.2'`; + + await client.query(insertSql); + console.log(' (Snapshot stored)'); + + // Verify + console.log(' --- Verify: Feature snapshots for a4 ---'); + const verifySql = ` + SELECT entityId, modelVersion, computedAt + FROM FeatureSnapshot + WHERE entityId = 'a4' + ORDER BY computedAt DESC`; + + const verifyRes = await client.query(verifySql); + for (const row of verifyRes.rows) { + console.log(` ${row.entityid} | version: ${row.modelversion} | computed: ${row.computedat}`); + } +} + +async function main() { + const client = new Client({ + host: HOST, + port: parseInt(PG_PORT), + database: DB_NAME, + user: USER, + password: PASSWORD, + }); + + await client.connect(); + console.log('Connected to ArcadeDB via PostgreSQL protocol on port ' + PG_PORT); + + try { + console.log('========== FRAUD DOMAIN =========='); + await tryRun(() => runQuery1(client), 'Query 1'); + await tryRun(() => runQuery2(client), 'Query 2'); + await tryRun(() => runQuery3(client), 'Query 3'); + await tryRun(() => runQuery4(client), 'Query 4'); + await tryRun(() => runQuery5(client), 'Query 5'); + + console.log('\n========== RECOMMENDATION DOMAIN =========='); + await tryRun(() => runQuery6(client), 'Query 6'); + await tryRun(() => runQuery7(client), 'Query 7'); + await tryRun(() => runQuery8(client), 'Query 8'); + + console.log('\n========== MAINTENANCE DOMAIN =========='); + await tryRun(() => runQuery9(client), 'Query 9'); + await tryRun(() => runQuery10(client), 'Query 10'); + + console.log('\n========== CROSS-DOMAIN =========='); + await tryRun(() => runQuery11(client), 'Query 11'); + } finally { + await client.end(); + } + console.log('\nAll queries complete.'); +} + +main(); diff --git a/feature-store/js/package.json b/feature-store/js/package.json new file mode 100644 index 0000000..c6dc913 --- /dev/null +++ b/feature-store/js/package.json @@ -0,0 +1,10 @@ +{ + "name": "feature-store", + "version": "1.0.0", + "private": true, + "description": "ArcadeDB Feature Store queries via PostgreSQL protocol", + "main": "feature-store.js", + "dependencies": { + "pg": "^8.13.0" + } +} diff --git a/feature-store/queries/queries.sh b/feature-store/queries/queries.sh new file mode 100755 index 0000000..58ec64d --- /dev/null +++ b/feature-store/queries/queries.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +# AI/ML Feature Store — all 11 query patterns via curl +# Prerequisites: ArcadeDB running, setup.sh already executed, jq installed +# Usage: ./queries/queries.sh + +set -euo pipefail + +ARCADEDB_URL="${ARCADEDB_URL:-http://localhost:2480}" +ARCADEDB_USER="${ARCADEDB_USER:-root}" +ARCADEDB_PASS="${ARCADEDB_PASS:-arcadedb}" +AUTH="${ARCADEDB_USER}:${ARCADEDB_PASS}" +DB="FeatureStore" +QUERY_URL="${ARCADEDB_URL}/api/v1/query/${DB}" +COMMAND_URL="${ARCADEDB_URL}/api/v1/command/${DB}" + +query() { + local lang="$1" cmd="$2" + jq -cn --arg l "$lang" --arg c "$cmd" '{"language":$l,"command":$c}' \ + | curl -sf -u "$AUTH" -X POST "$QUERY_URL" \ + -H "Content-Type: application/json" -d @- \ + | jq '.result' +} + +command() { + local lang="$1" cmd="$2" + jq -cn --arg l "$lang" --arg c "$cmd" '{"language":$l,"command":$c}' \ + | curl -sf -u "$AUTH" -X POST "$COMMAND_URL" \ + -H "Content-Type: application/json" -d @- \ + | jq '.result' +} + +# ───────────────────────────────────────────────────────────────────────────── +echo "========== FRAUD DOMAIN ==========" +echo "" +echo "=== Query 1: Account Graph Features (SQL MATCH) ===" +echo "Compute graph topology features for account a4." +echo "" +query "sql" " +SELECT inDeg, outDeg, counterparties +FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties +) +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 2: Distance to Flagged Account (SQL MATCH) ===" +echo "Find shortest path from a4 to nearest flagged account via transfers." +echo "" +query "sql" " +SELECT flaggedId, depth +FROM ( + MATCH {type: Account, where: (accountId = 'a4')} + .both('TRANSFERRED'){while: (\$depth < 4), as: hop} + {type: Account, where: (flagged = true), as: flagged} + RETURN flagged.accountId AS flaggedId, \$depth AS depth +) +ORDER BY depth ASC +LIMIT 1 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 3: Behavior Similarity Search (SQL) ===" +echo "Find accounts with behavior vectors similar to flagged a6 [0.9,0.8,0.1,0.2]." +echo "" +query "sql" " +SELECT accountId, accountType, flagged +FROM Account +ORDER BY vectorNeighbors('Account[behaviorVec]', [0.9, 0.8, 0.1, 0.2], 10) DESC +LIMIT 5 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 4: Transaction Velocity (SQL) ===" +echo "Aggregate TransactionMetric for velocity features per account." +echo "" +query "sql" " +SELECT accountId, + sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount +FROM TransactionMetric +GROUP BY accountId +ORDER BY totalTx DESC +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 5: Shared Device Network (Cypher) ===" +echo "Find accounts sharing devices with flagged accounts." +echo "" +query "cypher" " +MATCH (flagged:Account {flagged: true}) + -[:LINKED_DEVICE]-(suspect:Account) +WHERE suspect.flagged = false +RETURN DISTINCT suspect.accountId, suspect.accountType, + flagged.accountId AS linkedToFlagged +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "========== RECOMMENDATION DOMAIN ==========" +echo "" +echo "=== Query 6: Collaborative Filtering (Cypher) ===" +echo "Find products to recommend to u1 based on shared purchases." +echo "" +query "cypher" " +MATCH (me:User {userId: 'u1'}) + -[:PURCHASED]->(p:Product) + <-[:PURCHASED]-(other:User) + -[:PURCHASED]->(rec:Product) +WHERE rec <> p + AND NOT (me)-[:PURCHASED]->(rec) +RETURN rec.name, rec.category, count(DISTINCT other) AS score +ORDER BY score DESC LIMIT 10 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 7: Product Embedding Search (SQL) ===" +echo "Find products similar to Laptop embedding [0.9,0.1,0.1,0.1]." +echo "" +query "sql" " +SELECT name, category, price +FROM Product +ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 10) DESC +LIMIT 5 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 8: Personalized Ranking (SQL) ===" +echo "Rank Electronics products for u1 by preference vector similarity." +echo "" +query "sql" " +SELECT name, price +FROM Product +WHERE category = 'Electronics' +ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 20) DESC +LIMIT 10 +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "========== MAINTENANCE DOMAIN ==========" +echo "" +echo "=== Query 9: Equipment Dependency Chain (SQL MATCH) ===" +echo "Find all downstream equipment affected if eq1 fails." +echo "" +query "sql" " +SELECT name, failureRate, criticality +FROM ( + MATCH {type: Equipment, where: (equipmentId = 'eq1')} + .in('DEPENDS_ON'){as: dep} + RETURN dep.name AS name, dep.failureRate AS failureRate, + dep.out('DEPENDS_ON')[0].criticality AS criticality +) +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "=== Query 10: Sensor Anomaly Detection (SQL) ===" +echo "Find equipment with anomalous sensor readings." +echo "" +query "sql" " +SELECT equipmentId, + avg(temperature) AS avgTemp, + max(vibration) AS maxVibration, + avg(pressure) AS avgPressure +FROM SensorReading +GROUP BY equipmentId +ORDER BY avgTemp DESC +" + +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "========== CROSS-DOMAIN ==========" +echo "" +echo "=== Query 11: Feature Vector Assembly (Multi-step) ===" +echo "Assemble a fraud feature vector for account a4." +echo "" + +echo "--- Step 1: Graph features (degree + counterparties) ---" +query "sql" " +SELECT inDeg, outDeg, counterparties +FROM ( + MATCH {type: Account, where: (accountId = 'a4'), as: acct} + RETURN acct.in('TRANSFERRED').size() AS inDeg, + acct.out('TRANSFERRED').size() AS outDeg, + acct.both('TRANSFERRED').size() AS counterparties +) +" + +echo "" +echo "--- Step 2: Vector features (similarity rank to known fraud) ---" +query "sql" " +SELECT accountId, flagged +FROM Account +ORDER BY vectorNeighbors('Account[behaviorVec]', [0.7, 0.6, 0.2, 0.3], 10) DESC +LIMIT 5 +" + +echo "" +echo "--- Step 3: Time-series features (transaction velocity) ---" +query "sql" " +SELECT sum(txCount) AS totalTx, + sum(totalAmount) AS totalAmount, + avg(totalAmount) AS avgBucketAmount +FROM TransactionMetric +WHERE accountId = 'a4' +" + +echo "" +echo "--- Step 4: Store feature snapshot ---" +command "sql" " +INSERT INTO FeatureSnapshot SET entityId = 'a4', entityType = 'Account', + featureVector = [8, 6, 3, 67, 145000, 0.87], + computedAt = '2026-03-23 00:00:00', modelVersion = 'fraud-v2.2' +" +echo "(Snapshot stored)" + +echo "" +echo "--- Verify: Feature snapshots for a4 ---" +query "sql" " +SELECT entityId, modelVersion, computedAt +FROM FeatureSnapshot +WHERE entityId = 'a4' +ORDER BY computedAt DESC +" diff --git a/feature-store/setup.sh b/feature-store/setup.sh new file mode 100755 index 0000000..67deb9f --- /dev/null +++ b/feature-store/setup.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -euo pipefail + +ARCADEDB_URL="${ARCADEDB_URL:-http://localhost:2480}" +ARCADEDB_USER="${ARCADEDB_USER:-root}" +ARCADEDB_PASS="${ARCADEDB_PASS:-arcadedb}" +DB_NAME="FeatureStore" + +# ── Wait for ArcadeDB ───────────────────────────────────────────────────────── +echo "Waiting for ArcadeDB at ${ARCADEDB_URL}..." +until curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + "${ARCADEDB_URL}/api/v1/ready" > /dev/null 2>&1; do + sleep 2 +done +echo "ArcadeDB is ready." + +# ── Create database ─────────────────────────────────────────────────────────── +echo "Creating database ${DB_NAME}..." +curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + -X POST "${ARCADEDB_URL}/api/v1/server" \ + -H "Content-Type: application/json" \ + -d "{\"command\": \"create database ${DB_NAME}\"}" > /dev/null || true +echo "Database ready." + +# ── Helper: send one SQL statement ─────────────────────────────────────────── +send_sql() { + local stmt="$1" + jq -cn --arg cmd "$stmt" '{"language":"sql","command":$cmd}' \ + | curl -sf -u "${ARCADEDB_USER}:${ARCADEDB_PASS}" \ + -X POST "${ARCADEDB_URL}/api/v1/command/${DB_NAME}" \ + -H "Content-Type: application/json" \ + -d @- > /dev/null +} + +# ── Apply a SQL file (one statement per line) ───────────────────────────────── +apply_file() { + local file="$1" + echo "Applying ${file}..." + while IFS= read -r line || [[ -n "$line" ]]; do + # skip blank lines and SQL comments + [[ -z "${line//[[:space:]]/}" || "$line" =~ ^[[:space:]]*-- ]] && continue + send_sql "${line%%;}" + done < "$file" + echo "Done: ${file}" +} + +apply_file "sql/01-schema.sql" +apply_file "sql/02-data.sql" + +echo "" +echo "Setup complete. ${DB_NAME} is ready." diff --git a/feature-store/sql/01-schema.sql b/feature-store/sql/01-schema.sql new file mode 100644 index 0000000..46ee4ed --- /dev/null +++ b/feature-store/sql/01-schema.sql @@ -0,0 +1,68 @@ +-- Fraud domain +CREATE VERTEX TYPE Account IF NOT EXISTS; +CREATE PROPERTY Account.accountId IF NOT EXISTS STRING; +CREATE PROPERTY Account.accountType IF NOT EXISTS STRING; +CREATE PROPERTY Account.signupSource IF NOT EXISTS STRING; +CREATE PROPERTY Account.flagged IF NOT EXISTS BOOLEAN; +CREATE PROPERTY Account.behaviorVec IF NOT EXISTS LIST; +CREATE INDEX IF NOT EXISTS ON Account (accountId) UNIQUE; +CREATE VERTEX TYPE Merchant IF NOT EXISTS; +CREATE PROPERTY Merchant.merchantId IF NOT EXISTS STRING; +CREATE PROPERTY Merchant.category IF NOT EXISTS STRING; +CREATE PROPERTY Merchant.riskTier IF NOT EXISTS STRING; +-- Recommendation domain +CREATE VERTEX TYPE User IF NOT EXISTS; +CREATE PROPERTY User.userId IF NOT EXISTS STRING; +CREATE PROPERTY User.preferenceVec IF NOT EXISTS LIST; +CREATE INDEX IF NOT EXISTS ON User (userId) UNIQUE; +CREATE VERTEX TYPE Product IF NOT EXISTS; +CREATE PROPERTY Product.productId IF NOT EXISTS STRING; +CREATE PROPERTY Product.name IF NOT EXISTS STRING; +CREATE PROPERTY Product.category IF NOT EXISTS STRING; +CREATE PROPERTY Product.price IF NOT EXISTS FLOAT; +CREATE PROPERTY Product.embedding IF NOT EXISTS LIST; +-- Maintenance domain +CREATE VERTEX TYPE Equipment IF NOT EXISTS; +CREATE PROPERTY Equipment.equipmentId IF NOT EXISTS STRING; +CREATE PROPERTY Equipment.name IF NOT EXISTS STRING; +CREATE PROPERTY Equipment.specifications IF NOT EXISTS STRING; +CREATE PROPERTY Equipment.failureRate IF NOT EXISTS FLOAT; +CREATE INDEX IF NOT EXISTS ON Equipment (equipmentId) UNIQUE; +CREATE VERTEX TYPE Sensor IF NOT EXISTS; +CREATE PROPERTY Sensor.sensorId IF NOT EXISTS STRING; +CREATE PROPERTY Sensor.sensorType IF NOT EXISTS STRING; +CREATE PROPERTY Sensor.unit IF NOT EXISTS STRING; +-- Edge types +CREATE EDGE TYPE TRANSFERRED IF NOT EXISTS; +CREATE PROPERTY TRANSFERRED.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY TRANSFERRED.recordedAt IF NOT EXISTS DATETIME; +CREATE EDGE TYPE LINKED_DEVICE IF NOT EXISTS; +CREATE PROPERTY LINKED_DEVICE.deviceId IF NOT EXISTS STRING; +CREATE EDGE TYPE TRANSACTED IF NOT EXISTS; +CREATE PROPERTY TRANSACTED.amount IF NOT EXISTS FLOAT; +CREATE PROPERTY TRANSACTED.recordedAt IF NOT EXISTS DATETIME; +CREATE EDGE TYPE PURCHASED IF NOT EXISTS; +CREATE EDGE TYPE DEPENDS_ON IF NOT EXISTS; +CREATE PROPERTY DEPENDS_ON.criticality IF NOT EXISTS STRING; +CREATE EDGE TYPE MONITORED_BY IF NOT EXISTS; +-- Document types +CREATE DOCUMENT TYPE TransactionMetric IF NOT EXISTS; +CREATE PROPERTY TransactionMetric.accountId IF NOT EXISTS STRING; +CREATE PROPERTY TransactionMetric.txCount IF NOT EXISTS LONG; +CREATE PROPERTY TransactionMetric.totalAmount IF NOT EXISTS FLOAT; +CREATE PROPERTY TransactionMetric.recordedAt IF NOT EXISTS DATETIME; +CREATE DOCUMENT TYPE SensorReading IF NOT EXISTS; +CREATE PROPERTY SensorReading.equipmentId IF NOT EXISTS STRING; +CREATE PROPERTY SensorReading.temperature IF NOT EXISTS FLOAT; +CREATE PROPERTY SensorReading.vibration IF NOT EXISTS FLOAT; +CREATE PROPERTY SensorReading.pressure IF NOT EXISTS FLOAT; +CREATE PROPERTY SensorReading.recordedAt IF NOT EXISTS DATETIME; +CREATE DOCUMENT TYPE FeatureSnapshot IF NOT EXISTS; +CREATE PROPERTY FeatureSnapshot.entityId IF NOT EXISTS STRING; +CREATE PROPERTY FeatureSnapshot.entityType IF NOT EXISTS STRING; +CREATE PROPERTY FeatureSnapshot.featureVector IF NOT EXISTS LIST; +CREATE PROPERTY FeatureSnapshot.computedAt IF NOT EXISTS DATETIME; +CREATE PROPERTY FeatureSnapshot.modelVersion IF NOT EXISTS STRING; +-- Vector indexes +CREATE INDEX IF NOT EXISTS ON Account (behaviorVec) LSM_VECTOR METADATA { dimensions: 4, similarity: 'COSINE' }; +CREATE INDEX IF NOT EXISTS ON Product (embedding) LSM_VECTOR METADATA { dimensions: 4, similarity: 'COSINE' }; diff --git a/feature-store/sql/02-data.sql b/feature-store/sql/02-data.sql new file mode 100644 index 0000000..169991e --- /dev/null +++ b/feature-store/sql/02-data.sql @@ -0,0 +1,123 @@ +-- === Fraud Domain: Accounts === +-- a1-a3: legit, a4-a5: suspicious, a6: flagged +INSERT INTO Account SET accountId = 'a1', accountType = 'personal', signupSource = 'organic', flagged = false, behaviorVec = [0.1, 0.2, 0.8, 0.9]; +INSERT INTO Account SET accountId = 'a2', accountType = 'personal', signupSource = 'referral', flagged = false, behaviorVec = [0.2, 0.1, 0.9, 0.8]; +INSERT INTO Account SET accountId = 'a3', accountType = 'business', signupSource = 'organic', flagged = false, behaviorVec = [0.1, 0.3, 0.7, 0.9]; +INSERT INTO Account SET accountId = 'a4', accountType = 'personal', signupSource = 'ad_campaign', flagged = false, behaviorVec = [0.7, 0.6, 0.2, 0.3]; +INSERT INTO Account SET accountId = 'a5', accountType = 'personal', signupSource = 'ad_campaign', flagged = false, behaviorVec = [0.8, 0.7, 0.2, 0.1]; +INSERT INTO Account SET accountId = 'a6', accountType = 'personal', signupSource = 'unknown', flagged = true, behaviorVec = [0.9, 0.8, 0.1, 0.2]; +-- Merchants +INSERT INTO Merchant SET merchantId = 'm1', category = 'grocery', riskTier = 'low'; +INSERT INTO Merchant SET merchantId = 'm2', category = 'electronics', riskTier = 'low'; +INSERT INTO Merchant SET merchantId = 'm3', category = 'gambling', riskTier = 'high'; +INSERT INTO Merchant SET merchantId = 'm4', category = 'crypto', riskTier = 'high'; +-- TRANSFERRED edges (money flows; a4-a6 form a circular pattern) +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Account WHERE accountId = 'a2') SET amount = 500.00, recordedAt = '2026-03-01 10:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a2') TO (SELECT FROM Account WHERE accountId = 'a3') SET amount = 200.00, recordedAt = '2026-03-02 11:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Account WHERE accountId = 'a3') SET amount = 150.00, recordedAt = '2026-03-03 09:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Account WHERE accountId = 'a6') SET amount = 3000.00, recordedAt = '2026-03-10 02:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a6') TO (SELECT FROM Account WHERE accountId = 'a5') SET amount = 2800.00, recordedAt = '2026-03-10 02:30:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a5') TO (SELECT FROM Account WHERE accountId = 'a4') SET amount = 2500.00, recordedAt = '2026-03-10 03:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Account WHERE accountId = 'a5') SET amount = 1500.00, recordedAt = '2026-03-11 04:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a6') TO (SELECT FROM Account WHERE accountId = 'a4') SET amount = 4000.00, recordedAt = '2026-03-12 01:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a3') TO (SELECT FROM Account WHERE accountId = 'a1') SET amount = 100.00, recordedAt = '2026-03-05 14:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a2') TO (SELECT FROM Account WHERE accountId = 'a1') SET amount = 250.00, recordedAt = '2026-03-06 16:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a5') TO (SELECT FROM Account WHERE accountId = 'a6') SET amount = 1800.00, recordedAt = '2026-03-13 05:00:00'; +CREATE EDGE TRANSFERRED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Account WHERE accountId = 'a4') SET amount = 300.00, recordedAt = '2026-03-08 12:00:00'; +-- LINKED_DEVICE edges (a4 and a5 share devices with flagged a6) +CREATE EDGE LINKED_DEVICE FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Account WHERE accountId = 'a6') SET deviceId = 'dev-001'; +CREATE EDGE LINKED_DEVICE FROM (SELECT FROM Account WHERE accountId = 'a5') TO (SELECT FROM Account WHERE accountId = 'a6') SET deviceId = 'dev-002'; +CREATE EDGE LINKED_DEVICE FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Account WHERE accountId = 'a5') SET deviceId = 'dev-003'; +CREATE EDGE LINKED_DEVICE FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Account WHERE accountId = 'a2') SET deviceId = 'dev-004'; +-- TRANSACTED edges (account -> merchant) +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Merchant WHERE merchantId = 'm1') SET amount = 85.50, recordedAt = '2026-03-01 09:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a1') TO (SELECT FROM Merchant WHERE merchantId = 'm2') SET amount = 450.00, recordedAt = '2026-03-02 14:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a2') TO (SELECT FROM Merchant WHERE merchantId = 'm1') SET amount = 62.30, recordedAt = '2026-03-03 10:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Merchant WHERE merchantId = 'm3') SET amount = 5000.00, recordedAt = '2026-03-10 22:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a4') TO (SELECT FROM Merchant WHERE merchantId = 'm4') SET amount = 8000.00, recordedAt = '2026-03-11 01:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a5') TO (SELECT FROM Merchant WHERE merchantId = 'm3') SET amount = 3500.00, recordedAt = '2026-03-12 23:00:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a6') TO (SELECT FROM Merchant WHERE merchantId = 'm4') SET amount = 12000.00, recordedAt = '2026-03-10 00:30:00'; +CREATE EDGE TRANSACTED FROM (SELECT FROM Account WHERE accountId = 'a3') TO (SELECT FROM Merchant WHERE merchantId = 'm2') SET amount = 320.00, recordedAt = '2026-03-04 15:00:00'; +-- TransactionMetric documents (time-bucketed velocity data) +INSERT INTO TransactionMetric SET accountId = 'a1', txCount = 5, totalAmount = 1200.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a1', txCount = 3, totalAmount = 800.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a1', txCount = 4, totalAmount = 950.00, recordedAt = '2026-03-15 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a2', txCount = 2, totalAmount = 400.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a2', txCount = 3, totalAmount = 600.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a3', txCount = 4, totalAmount = 1500.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a3', txCount = 2, totalAmount = 700.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a4', txCount = 15, totalAmount = 25000.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a4', txCount = 22, totalAmount = 48000.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a4', txCount = 30, totalAmount = 72000.00, recordedAt = '2026-03-15 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a5', txCount = 8, totalAmount = 12000.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a5', txCount = 18, totalAmount = 35000.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a6', txCount = 25, totalAmount = 60000.00, recordedAt = '2026-03-01 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a6', txCount = 35, totalAmount = 95000.00, recordedAt = '2026-03-08 00:00:00'; +INSERT INTO TransactionMetric SET accountId = 'a6', txCount = 40, totalAmount = 120000.00, recordedAt = '2026-03-15 00:00:00'; +-- === Recommendation Domain: Users === +INSERT INTO User SET userId = 'u1', preferenceVec = [0.9, 0.1, 0.1, 0.1]; +INSERT INTO User SET userId = 'u2', preferenceVec = [0.7, 0.3, 0.1, 0.1]; +INSERT INTO User SET userId = 'u3', preferenceVec = [0.1, 0.9, 0.1, 0.1]; +INSERT INTO User SET userId = 'u4', preferenceVec = [0.1, 0.1, 0.9, 0.1]; +INSERT INTO User SET userId = 'u5', preferenceVec = [0.4, 0.4, 0.2, 0.1]; +-- Products +INSERT INTO Product SET productId = 'p1', name = 'Laptop', category = 'Electronics', price = 999.99, embedding = [0.9, 0.1, 0.1, 0.1]; +INSERT INTO Product SET productId = 'p2', name = 'Phone', category = 'Electronics', price = 699.99, embedding = [0.8, 0.2, 0.1, 0.1]; +INSERT INTO Product SET productId = 'p3', name = 'Headphones', category = 'Electronics', price = 199.99, embedding = [0.7, 0.2, 0.2, 0.1]; +INSERT INTO Product SET productId = 'p4', name = 'ML Textbook', category = 'Books', price = 79.99, embedding = [0.1, 0.9, 0.1, 0.1]; +INSERT INTO Product SET productId = 'p5', name = 'Data Science Guide', category = 'Books', price = 49.99, embedding = [0.2, 0.8, 0.1, 0.1]; +INSERT INTO Product SET productId = 'p6', name = 'Python Cookbook', category = 'Books', price = 39.99, embedding = [0.1, 0.8, 0.2, 0.1]; +INSERT INTO Product SET productId = 'p7', name = 'Running Shoes', category = 'Sports', price = 89.99, embedding = [0.1, 0.1, 0.9, 0.1]; +INSERT INTO Product SET productId = 'p8', name = 'Yoga Mat', category = 'Sports', price = 29.99, embedding = [0.1, 0.1, 0.8, 0.2]; +-- PURCHASED edges (u1+u2 share Laptop and Phone -> collab recommends ML Textbook to u1) +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u1') TO (SELECT FROM Product WHERE productId = 'p1'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u1') TO (SELECT FROM Product WHERE productId = 'p2'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u1') TO (SELECT FROM Product WHERE productId = 'p3'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u2') TO (SELECT FROM Product WHERE productId = 'p1'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u2') TO (SELECT FROM Product WHERE productId = 'p2'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u2') TO (SELECT FROM Product WHERE productId = 'p4'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u3') TO (SELECT FROM Product WHERE productId = 'p4'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u3') TO (SELECT FROM Product WHERE productId = 'p5'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u3') TO (SELECT FROM Product WHERE productId = 'p6'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u4') TO (SELECT FROM Product WHERE productId = 'p7'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u4') TO (SELECT FROM Product WHERE productId = 'p8'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u5') TO (SELECT FROM Product WHERE productId = 'p1'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u5') TO (SELECT FROM Product WHERE productId = 'p5'); +CREATE EDGE PURCHASED FROM (SELECT FROM User WHERE userId = 'u5') TO (SELECT FROM Product WHERE productId = 'p7'); +-- === Maintenance Domain: Equipment === +INSERT INTO Equipment SET equipmentId = 'eq1', name = 'Main Compressor', specifications = 'Industrial 500HP', failureRate = 0.02; +INSERT INTO Equipment SET equipmentId = 'eq2', name = 'Cooling Unit A', specifications = 'Glycol cooling 200kW', failureRate = 0.05; +INSERT INTO Equipment SET equipmentId = 'eq3', name = 'Pump Station B', specifications = 'Centrifugal 150GPM', failureRate = 0.03; +INSERT INTO Equipment SET equipmentId = 'eq4', name = 'Generator Alpha', specifications = 'Diesel 800kVA', failureRate = 0.01; +INSERT INTO Equipment SET equipmentId = 'eq5', name = 'Control Panel C', specifications = 'PLC-based HMI', failureRate = 0.08; +-- DEPENDS_ON edges (eq2,eq3 depend on eq1; eq5 depends on eq4; eq4 depends on eq1) +CREATE EDGE DEPENDS_ON FROM (SELECT FROM Equipment WHERE equipmentId = 'eq2') TO (SELECT FROM Equipment WHERE equipmentId = 'eq1') SET criticality = 'high'; +CREATE EDGE DEPENDS_ON FROM (SELECT FROM Equipment WHERE equipmentId = 'eq3') TO (SELECT FROM Equipment WHERE equipmentId = 'eq1') SET criticality = 'medium'; +CREATE EDGE DEPENDS_ON FROM (SELECT FROM Equipment WHERE equipmentId = 'eq4') TO (SELECT FROM Equipment WHERE equipmentId = 'eq1') SET criticality = 'high'; +CREATE EDGE DEPENDS_ON FROM (SELECT FROM Equipment WHERE equipmentId = 'eq5') TO (SELECT FROM Equipment WHERE equipmentId = 'eq4') SET criticality = 'high'; +-- Sensors +INSERT INTO Sensor SET sensorId = 's1', sensorType = 'temperature', unit = 'celsius'; +INSERT INTO Sensor SET sensorId = 's2', sensorType = 'vibration', unit = 'mm_per_sec'; +INSERT INTO Sensor SET sensorId = 's3', sensorType = 'pressure', unit = 'bar'; +-- MONITORED_BY edges +CREATE EDGE MONITORED_BY FROM (SELECT FROM Equipment WHERE equipmentId = 'eq1') TO (SELECT FROM Sensor WHERE sensorId = 's1'); +CREATE EDGE MONITORED_BY FROM (SELECT FROM Equipment WHERE equipmentId = 'eq1') TO (SELECT FROM Sensor WHERE sensorId = 's2'); +CREATE EDGE MONITORED_BY FROM (SELECT FROM Equipment WHERE equipmentId = 'eq2') TO (SELECT FROM Sensor WHERE sensorId = 's1'); +CREATE EDGE MONITORED_BY FROM (SELECT FROM Equipment WHERE equipmentId = 'eq3') TO (SELECT FROM Sensor WHERE sensorId = 's3'); +-- SensorReading documents (eq1 showing anomalous high temperature and vibration) +INSERT INTO SensorReading SET equipmentId = 'eq1', temperature = 85.2, vibration = 4.5, pressure = 6.1, recordedAt = '2026-03-15 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq1', temperature = 92.1, vibration = 5.8, pressure = 6.3, recordedAt = '2026-03-16 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq1', temperature = 98.5, vibration = 7.2, pressure = 6.5, recordedAt = '2026-03-17 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq1', temperature = 105.3, vibration = 9.1, pressure = 6.8, recordedAt = '2026-03-18 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq2', temperature = 42.1, vibration = 1.2, pressure = 4.5, recordedAt = '2026-03-15 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq2', temperature = 43.0, vibration = 1.3, pressure = 4.6, recordedAt = '2026-03-16 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq2', temperature = 41.8, vibration = 1.1, pressure = 4.4, recordedAt = '2026-03-17 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq2', temperature = 42.5, vibration = 1.2, pressure = 4.5, recordedAt = '2026-03-18 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq3', temperature = 55.0, vibration = 2.0, pressure = 8.1, recordedAt = '2026-03-15 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq3', temperature = 54.5, vibration = 2.1, pressure = 8.0, recordedAt = '2026-03-16 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq3', temperature = 56.2, vibration = 1.9, pressure = 8.2, recordedAt = '2026-03-17 08:00:00'; +INSERT INTO SensorReading SET equipmentId = 'eq3', temperature = 55.8, vibration = 2.0, pressure = 8.1, recordedAt = '2026-03-18 08:00:00'; +-- === Feature Store Infrastructure: FeatureSnapshots === +INSERT INTO FeatureSnapshot SET entityId = 'a1', entityType = 'Account', featureVector = [4, 3, 5, 12, 2950, 0.15], computedAt = '2026-03-15 00:00:00', modelVersion = 'fraud-v2.1'; +INSERT INTO FeatureSnapshot SET entityId = 'a4', entityType = 'Account', featureVector = [8, 6, 3, 67, 145000, 0.87], computedAt = '2026-03-15 00:00:00', modelVersion = 'fraud-v2.1'; +INSERT INTO FeatureSnapshot SET entityId = 'a6', entityType = 'Account', featureVector = [11, 5, 1, 100, 275000, 0.99], computedAt = '2026-03-15 00:00:00', modelVersion = 'fraud-v2.1'; From 327cd20d79f719b0072ed462b18d45fb3bd6fbbb Mon Sep 17 00:00:00 2001 From: robfrank Date: Mon, 23 Mar 2026 14:17:00 +0100 Subject: [PATCH 2/2] fix(feature-store): address code review findings - Query 2: restructure MATCH to filter flagged in outer WHERE (terminal node after while: traversal is invalid ArcadeDB syntax) - Query 9: add while: for recursive traversal, use .inE()/.outV() to access edge criticality (was reading from vertex, returning null) - Query 8: rename "Personalized Ranking" to "Category Vector Search" (vector is hardcoded for u1, not dynamically read) - Rename command() to send_command() in queries.sh (shadows bash built-in) - Add package-lock.json, switch CI to npm ci for reproducible builds Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/feature-store.yml | 4 +- feature-store/README.md | 2 +- .../com/arcadedb/examples/FeatureStore.java | 29 ++-- feature-store/js/feature-store.js | 24 +-- feature-store/js/package-lock.json | 161 ++++++++++++++++++ feature-store/queries/queries.sh | 22 +-- 6 files changed, 205 insertions(+), 37 deletions(-) create mode 100644 feature-store/js/package-lock.json diff --git a/.github/workflows/feature-store.yml b/.github/workflows/feature-store.yml index 932b8d4..d3dcd47 100644 --- a/.github/workflows/feature-store.yml +++ b/.github/workflows/feature-store.yml @@ -58,13 +58,13 @@ jobs: uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 with: path: ~/.npm - key: ${{ runner.os }}-npm-feature-store-${{ hashFiles('feature-store/js/package.json') }} + key: ${{ runner.os }}-npm-feature-store-${{ hashFiles('feature-store/js/package-lock.json') }} restore-keys: ${{ runner.os }}-npm-feature-store- - name: Install JS dependencies if: matrix.runner == 'js' working-directory: feature-store/js - run: npm install + run: npm ci - name: Start ArcadeDB working-directory: feature-store diff --git a/feature-store/README.md b/feature-store/README.md index 1b22204..f1fd94f 100644 --- a/feature-store/README.md +++ b/feature-store/README.md @@ -87,7 +87,7 @@ node feature-store.js | 5 | Shared Device Network | Cypher | Graph | Fraud | | 6 | Collaborative Filtering | Cypher | Graph | Recommendations | | 7 | Product Embedding Search | SQL + vectorNeighbors | Vector | Recommendations | -| 8 | Personalized Ranking | SQL + vectorNeighbors | Vector | Recommendations | +| 8 | Category Vector Search | SQL + vectorNeighbors | Vector | Recommendations | | 9 | Equipment Dependency Chain | SQL MATCH | Graph | Maintenance | | 10 | Sensor Anomaly Detection | SQL | Time-series | Maintenance | | 11 | Feature Vector Assembly | SQL (multi-step) | All | Cross-domain | diff --git a/feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java b/feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java index 7152984..8d9a08e 100644 --- a/feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java +++ b/feature-store/java/src/main/java/com/arcadedb/examples/FeatureStore.java @@ -24,7 +24,7 @@ public static void main(String[] args) { System.out.println("\n========== RECOMMENDATION DOMAIN =========="); tryRun(() -> runQuery6CollaborativeFiltering(db), "Query 6"); tryRun(() -> runQuery7ProductEmbeddingSearch(db), "Query 7"); - tryRun(() -> runQuery8PersonalizedRanking(db), "Query 8"); + tryRun(() -> runQuery8CategoryVectorSearch(db), "Query 8"); System.out.println("\n========== MAINTENANCE DOMAIN =========="); tryRun(() -> runQuery9EquipmentDependencyChain(db), "Query 9"); @@ -76,13 +76,13 @@ private static void runQuery2DistanceToFlagged(RemoteDatabase db) { String sql = """ - SELECT flaggedId, depth + SELECT accountId AS flaggedId, depth FROM ( MATCH {type: Account, where: (accountId = 'a4')} .both('TRANSFERRED'){while: ($depth < 4), as: hop} - {type: Account, where: (flagged = true), as: flagged} - RETURN flagged.accountId AS flaggedId, $depth AS depth + RETURN hop.accountId AS accountId, hop.flagged AS flagged, $depth AS depth ) + WHERE flagged = true ORDER BY depth ASC LIMIT 1"""; @@ -215,10 +215,10 @@ ORDER BY vectorNeighbors('Product[embedding]', [0.9, 0.1, 0.1, 0.1], 10) DESC } } - // ── Query 8: Personalized Ranking (SQL) ──────────────────────────────────── - private static void runQuery8PersonalizedRanking(RemoteDatabase db) { - printHeader("Query 8: Personalized Ranking (SQL)", - "Rank Electronics products for u1 by preference vector similarity."); + // ── Query 8: Category Vector Search (SQL) ─────────────────────────────────── + private static void runQuery8CategoryVectorSearch(RemoteDatabase db) { + printHeader("Query 8: Category Vector Search (SQL)", + "Rank Electronics products by similarity to u1 preference [0.9,0.1,0.1,0.1]."); String sql = """ @@ -245,20 +245,23 @@ private static void runQuery9EquipmentDependencyChain(RemoteDatabase db) { String sql = """ - SELECT name, failureRate, criticality + SELECT name, failureRate, criticality, depth FROM ( MATCH {type: Equipment, where: (equipmentId = 'eq1')} - .in('DEPENDS_ON'){as: dep} + .inE('DEPENDS_ON'){while: ($depth < 5), as: e} + .outV(){as: dep} RETURN dep.name AS name, dep.failureRate AS failureRate, - dep.out('DEPENDS_ON')[0].criticality AS criticality - )"""; + e.criticality AS criticality, $depth AS depth + ) + ORDER BY depth ASC"""; try (ResultSet rs = db.query("sql", sql)) { while (rs.hasNext()) { Result r = rs.next(); System.out.println(" " + r.getProperty("name") + " | failureRate: " + r.getProperty("failureRate") - + " | criticality: " + r.getProperty("criticality")); + + " | criticality: " + r.getProperty("criticality") + + " | depth: " + r.getProperty("depth")); } } } diff --git a/feature-store/js/feature-store.js b/feature-store/js/feature-store.js index cafea76..ad66ba3 100644 --- a/feature-store/js/feature-store.js +++ b/feature-store/js/feature-store.js @@ -47,13 +47,13 @@ async function runQuery2(client) { 'Find shortest path from a4 to nearest flagged account via transfers.'); const sql = ` - SELECT flaggedId, depth + SELECT accountId AS flaggedId, depth FROM ( MATCH {type: Account, where: (accountId = 'a4')} .both('TRANSFERRED'){while: ($depth < 4), as: hop} - {type: Account, where: (flagged = true), as: flagged} - RETURN flagged.accountId AS flaggedId, $depth AS depth + RETURN hop.accountId AS accountId, hop.flagged AS flagged, $depth AS depth ) + WHERE flagged = true ORDER BY depth ASC LIMIT 1`; @@ -159,10 +159,10 @@ async function runQuery7(client) { } } -// ── Query 8: Personalized Ranking (Cypher via {cypher} prefix) ────────────── +// ── Query 8: Category Vector Search (SQL) ─────────────────────────────────── async function runQuery8(client) { - printHeader('Query 8: Personalized Ranking (SQL)', - 'Rank Electronics products for u1 by preference vector similarity.'); + printHeader('Query 8: Category Vector Search (SQL)', + 'Rank Electronics products by similarity to u1 preference [0.9,0.1,0.1,0.1].'); const sql = ` SELECT name, price @@ -183,17 +183,19 @@ async function runQuery9(client) { 'Find all downstream equipment affected if eq1 fails.'); const sql = ` - SELECT name, failureRate, criticality + SELECT name, failureRate, criticality, depth FROM ( MATCH {type: Equipment, where: (equipmentId = 'eq1')} - .in('DEPENDS_ON'){as: dep} + .inE('DEPENDS_ON'){while: ($depth < 5), as: e} + .outV(){as: dep} RETURN dep.name AS name, dep.failureRate AS failureRate, - dep.out('DEPENDS_ON')[0].criticality AS criticality - )`; + e.criticality AS criticality, $depth AS depth + ) + ORDER BY depth ASC`; const res = await client.query(sql); for (const row of res.rows) { - console.log(` ${String(row.name).padEnd(20)} | failureRate: ${row.failurerate} | criticality: ${row.criticality}`); + console.log(` ${String(row.name).padEnd(20)} | failureRate: ${row.failurerate} | criticality: ${row.criticality} | depth: ${row.depth}`); } } diff --git a/feature-store/js/package-lock.json b/feature-store/js/package-lock.json new file mode 100644 index 0000000..231fc85 --- /dev/null +++ b/feature-store/js/package-lock.json @@ -0,0 +1,161 @@ +{ + "name": "feature-store", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "feature-store", + "version": "1.0.0", + "dependencies": { + "pg": "^8.13.0" + } + }, + "node_modules/pg": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/pg/-/pg-8.20.0.tgz", + "integrity": "sha512-ldhMxz2r8fl/6QkXnBD3CR9/xg694oT6DZQ2s6c/RI28OjtSOpxnPrUCGOBJ46RCUxcWdx3p6kw/xnDHjKvaRA==", + "license": "MIT", + "dependencies": { + "pg-connection-string": "^2.12.0", + "pg-pool": "^3.13.0", + "pg-protocol": "^1.13.0", + "pg-types": "2.2.0", + "pgpass": "1.0.5" + }, + "engines": { + "node": ">= 16.0.0" + }, + "optionalDependencies": { + "pg-cloudflare": "^1.3.0" + }, + "peerDependencies": { + "pg-native": ">=3.0.1" + }, + "peerDependenciesMeta": { + "pg-native": { + "optional": true + } + } + }, + "node_modules/pg-cloudflare": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/pg-cloudflare/-/pg-cloudflare-1.3.0.tgz", + "integrity": "sha512-6lswVVSztmHiRtD6I8hw4qP/nDm1EJbKMRhf3HCYaqud7frGysPv7FYJ5noZQdhQtN2xJnimfMtvQq21pdbzyQ==", + "license": "MIT", + "optional": true + }, + "node_modules/pg-connection-string": { + "version": "2.12.0", + "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.12.0.tgz", + "integrity": "sha512-U7qg+bpswf3Cs5xLzRqbXbQl85ng0mfSV/J0nnA31MCLgvEaAo7CIhmeyrmJpOr7o+zm0rXK+hNnT5l9RHkCkQ==", + "license": "MIT" + }, + "node_modules/pg-int8": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", + "integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==", + "license": "ISC", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/pg-pool": { + "version": "3.13.0", + "resolved": "https://registry.npmjs.org/pg-pool/-/pg-pool-3.13.0.tgz", + "integrity": "sha512-gB+R+Xud1gLFuRD/QgOIgGOBE2KCQPaPwkzBBGC9oG69pHTkhQeIuejVIk3/cnDyX39av2AxomQiyPT13WKHQA==", + "license": "MIT", + "peerDependencies": { + "pg": ">=8.0" + } + }, + "node_modules/pg-protocol": { + "version": "1.13.0", + "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.13.0.tgz", + "integrity": "sha512-zzdvXfS6v89r6v7OcFCHfHlyG/wvry1ALxZo4LqgUoy7W9xhBDMaqOuMiF3qEV45VqsN6rdlcehHrfDtlCPc8w==", + "license": "MIT" + }, + "node_modules/pg-types": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", + "integrity": "sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==", + "license": "MIT", + "dependencies": { + "pg-int8": "1.0.1", + "postgres-array": "~2.0.0", + "postgres-bytea": "~1.0.0", + "postgres-date": "~1.0.4", + "postgres-interval": "^1.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/pgpass": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/pgpass/-/pgpass-1.0.5.tgz", + "integrity": "sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==", + "license": "MIT", + "dependencies": { + "split2": "^4.1.0" + } + }, + "node_modules/postgres-array": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz", + "integrity": "sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/postgres-bytea": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-1.0.1.tgz", + "integrity": "sha512-5+5HqXnsZPE65IJZSMkZtURARZelel2oXUEO8rH83VS/hxH5vv1uHquPg5wZs8yMAfdv971IU+kcPUczi7NVBQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-date": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.7.tgz", + "integrity": "sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-interval": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-1.2.0.tgz", + "integrity": "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==", + "license": "MIT", + "dependencies": { + "xtend": "^4.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/split2": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz", + "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==", + "license": "ISC", + "engines": { + "node": ">= 10.x" + } + }, + "node_modules/xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "license": "MIT", + "engines": { + "node": ">=0.4" + } + } + } +} diff --git a/feature-store/queries/queries.sh b/feature-store/queries/queries.sh index 58ec64d..d989634 100755 --- a/feature-store/queries/queries.sh +++ b/feature-store/queries/queries.sh @@ -21,7 +21,7 @@ query() { | jq '.result' } -command() { +send_command() { local lang="$1" cmd="$2" jq -cn --arg l "$lang" --arg c "$cmd" '{"language":$l,"command":$c}' \ | curl -sf -u "$AUTH" -X POST "$COMMAND_URL" \ @@ -51,13 +51,13 @@ echo "=== Query 2: Distance to Flagged Account (SQL MATCH) ===" echo "Find shortest path from a4 to nearest flagged account via transfers." echo "" query "sql" " -SELECT flaggedId, depth +SELECT accountId AS flaggedId, depth FROM ( MATCH {type: Account, where: (accountId = 'a4')} .both('TRANSFERRED'){while: (\$depth < 4), as: hop} - {type: Account, where: (flagged = true), as: flagged} - RETURN flagged.accountId AS flaggedId, \$depth AS depth + RETURN hop.accountId AS accountId, hop.flagged AS flagged, \$depth AS depth ) +WHERE flagged = true ORDER BY depth ASC LIMIT 1 " @@ -134,8 +134,8 @@ LIMIT 5 # ───────────────────────────────────────────────────────────────────────────── echo "" -echo "=== Query 8: Personalized Ranking (SQL) ===" -echo "Rank Electronics products for u1 by preference vector similarity." +echo "=== Query 8: Category Vector Search (SQL) ===" +echo "Rank Electronics products by similarity to u1 preference [0.9,0.1,0.1,0.1]." echo "" query "sql" " SELECT name, price @@ -153,13 +153,15 @@ echo "=== Query 9: Equipment Dependency Chain (SQL MATCH) ===" echo "Find all downstream equipment affected if eq1 fails." echo "" query "sql" " -SELECT name, failureRate, criticality +SELECT name, failureRate, criticality, depth FROM ( MATCH {type: Equipment, where: (equipmentId = 'eq1')} - .in('DEPENDS_ON'){as: dep} + .inE('DEPENDS_ON'){while: (\$depth < 5), as: e} + .outV(){as: dep} RETURN dep.name AS name, dep.failureRate AS failureRate, - dep.out('DEPENDS_ON')[0].criticality AS criticality + e.criticality AS criticality, \$depth AS depth ) +ORDER BY depth ASC " # ───────────────────────────────────────────────────────────────────────────── @@ -217,7 +219,7 @@ WHERE accountId = 'a4' echo "" echo "--- Step 4: Store feature snapshot ---" -command "sql" " +send_command "sql" " INSERT INTO FeatureSnapshot SET entityId = 'a4', entityType = 'Account', featureVector = [8, 6, 3, 67, 145000, 0.87], computedAt = '2026-03-23 00:00:00', modelVersion = 'fraud-v2.2'