diff --git a/benchmarking/public/data-sets/fhvhv_tripdata_2023-01.parquet b/benchmarking/public/data-sets/fhvhv_tripdata_2023-01.parquet index 3e7d61d5..b755fc28 100644 Binary files a/benchmarking/public/data-sets/fhvhv_tripdata_2023-01.parquet and b/benchmarking/public/data-sets/fhvhv_tripdata_2023-01.parquet differ diff --git a/benchmarking/src/app/constants.ts b/benchmarking/src/app/constants.ts index d32603d0..a6b5f10f 100644 --- a/benchmarking/src/app/constants.ts +++ b/benchmarking/src/app/constants.ts @@ -1,102 +1,40 @@ -export const TEST_QUERIES = [ - ` - WITH group_by_query AS ( - SELECT - hvfhs_license_num, - COUNT(*) - FROM - taxi - GROUP BY - hvfhs_license_num - ), +import { tableName } from './query-benchmarking/dummy-data'; - full_query AS ( - SELECT - * - FROM - taxi - ) +const QUERY_1 = `SELECT * FROM taxi limit 50`; - SELECT - COUNT(*) - FROM - group_by_query - LEFT JOIN - full_query - ON - group_by_query.hvfhs_license_num = full_query.hvfhs_license_num - LIMIT 1 - `, - 'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi', - "SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100", - 'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num', - 'SELECT * FROM taxi ORDER BY bcf LIMIT 100', - ` - WITH group_by_query AS ( - SELECT - hvfhs_license_num, - COUNT(*) - FROM - taxi - GROUP BY - hvfhs_license_num - ), +const QUERY_2 = `(SELECT * +FROM taxi t1 +WHERE t1.PULocationID NOT IN ( + SELECT PULocationID + FROM in_memory_taxi_trips +)) +UNION +(SELECT * +FROM in_memory_taxi_trips) limit 50`; - full_query AS ( - SELECT - * - FROM - taxi - ) +const QUERY_3 = `(SELECT * + FROM taxi t1 + WHERE NOT EXISTS ( + SELECT 1 + FROM in_memory_taxi_trips t2 + WHERE t2.PULocationID = t1.PULocationID + )) + UNION ALL + (SELECT * FROM in_memory_taxi_trips) limit 50`; - SELECT - COUNT(*) - FROM - group_by_query - LEFT JOIN - full_query - ON - group_by_query.hvfhs_license_num = full_query.hvfhs_license_num - LIMIT 1 - `, - 'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi', - "SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100", - 'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num', - 'SELECT * FROM taxi ORDER BY bcf LIMIT 100', - ` - WITH group_by_query AS ( - SELECT - hvfhs_license_num, - COUNT(*) - FROM - taxi - GROUP BY - hvfhs_license_num - ), +const QUERY_4 = `SELECT t1.*, +FROM taxi t1 +LEFT ANTI JOIN ${tableName} t2 +ON t1.PULocationID = t2.PULocationID +UNION ALL +SELECT *, +FROM ${tableName} limit 50`; - full_query AS ( - SELECT - * - FROM - taxi - ) +const BASE_QUERY = `(SELECT * FROM taxi LIMIT 50) AS taxi`; - SELECT - COUNT(*) - FROM - group_by_query - LEFT JOIN - full_query - ON - group_by_query.hvfhs_license_num = full_query.hvfhs_license_num - LIMIT 1 - `, - 'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi', - "SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100", - 'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num', - 'SELECT * FROM taxi ORDER BY bcf LIMIT 100', - 'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi_json', - 'SELECT * FROM taxi_json WHERE price >= 1.0005812645 LIMIT 100', - 'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi_json GROUP BY order_count', - 'SELECT * FROM taxi_json ORDER BY seconds_in_bucket LIMIT 100', +export const TEST_QUERIES = [ + `SELECT count(*) FROM taxi`, + // `select * from ( + // SELECT * FROM ${BASE_QUERY} WHERE taxi.PULocationID NOT IN (SELECT ${tableName}.PULocationID FROM ${tableName}) + // UNION ALL (SELECT * FROM ${tableName}))`, ]; diff --git a/benchmarking/src/app/query-benchmarking/dummy-data.ts b/benchmarking/src/app/query-benchmarking/dummy-data.ts new file mode 100644 index 00000000..f7989e53 --- /dev/null +++ b/benchmarking/src/app/query-benchmarking/dummy-data.ts @@ -0,0 +1,151 @@ +interface TaxiTrip { + Airport_fee: number; + DOLocationID: number; + PULocationID: number; + RatecodeID: number; + VendorID: number; + congestion_surcharge: number; + extra: number; + fare_amount: number; + improvement_surcharge: number; + mta_tax: number; + passenger_count: number; + payment_type: number; + store_and_fwd_flag: 'Y' | 'N'; + tip_amount: number; + tolls_amount: number; + total_amount: number; + tpep_dropoff_datetime: string; // Unix timestamp in milliseconds + tpep_pickup_datetime: string; // Unix timestamp in milliseconds + trip_distance: number; +} + +// For a collection of trips +interface TaxiTripsData { + taxi_trips: TaxiTrip[]; +} + +export const tableName = 'in_memory_taxi_trips'; + +export const DUMMY_DATA: TaxiTripsData = { + taxi_trips: [ + { + Airport_fee: 0, + DOLocationID: 79, + PULocationID: 99, + RatecodeID: 1, + VendorID: 2, + congestion_surcharge: 2.5, + extra: 1, + fare_amount: 17.7, + improvement_surcharge: 1, + mta_tax: 0.5, + passenger_count: 1, + payment_type: 2, + store_and_fwd_flag: 'N', + tip_amount: 0, + tolls_amount: 0, + total_amount: 22.7, + tpep_dropoff_datetime: `'2023-01-01 14:30:00'`, + tpep_pickup_datetime: `'2023-01-01 14:30:00'`, + trip_distance: 1.72, + }, + { + Airport_fee: 0, + DOLocationID: 236, + PULocationID: 10, + RatecodeID: 1, + VendorID: 1, + congestion_surcharge: 2.5, + extra: 3.5, + fare_amount: 10, + improvement_surcharge: 1, + mta_tax: 0.5, + passenger_count: 1, + payment_type: 1, + store_and_fwd_flag: 'N', + tip_amount: 3.75, + tolls_amount: 0, + total_amount: 18.75, + tpep_dropoff_datetime: `'2023-01-01 14:30:00'`, + tpep_pickup_datetime: `'2023-01-01 14:30:00'`, + trip_distance: 1.8, + }, + ], +}; + +export const createTableSQL = () => { + return `CREATE TABLE IF NOT EXISTS ${tableName} ( + VendorID INTEGER, + tpep_pickup_datetime TIMESTAMP, + tpep_dropoff_datetime TIMESTAMP, + passenger_count INTEGER, + trip_distance DECIMAL(10,2), + RatecodeID INTEGER, + store_and_fwd_flag VARCHAR(1), + PULocationID INTEGER, + DOLocationID INTEGER, + payment_type INTEGER, + fare_amount DECIMAL(10,2), + extra DECIMAL(10,2), + mta_tax DECIMAL(10,2), + tip_amount DECIMAL(10,2), + tolls_amount DECIMAL(10,2), + improvement_surcharge DECIMAL(10,2), + total_amount DECIMAL(10,2), + congestion_surcharge DECIMAL(10,2), + Airport_fee DECIMAL(10,2), + )`; +}; + +export const insertDataSQL = (taxiTrips: TaxiTrip[]) => { + const values = taxiTrips + .map( + (trip) => `( + ${trip.Airport_fee}, + ${trip.DOLocationID}, + ${trip.PULocationID}, + ${trip.RatecodeID}, + ${trip.VendorID}, + ${trip.congestion_surcharge}, + ${trip.extra}, + ${trip.fare_amount}, + ${trip.improvement_surcharge}, + ${trip.mta_tax}, + ${trip.passenger_count}, + ${trip.payment_type}, + '${trip.store_and_fwd_flag}', + ${trip.tip_amount}, + ${trip.tolls_amount}, + ${trip.total_amount}, + ${trip.tpep_dropoff_datetime}, + ${trip.tpep_pickup_datetime}, + ${trip.trip_distance} + )` + ) + .join(','); + const sql = `INSERT INTO ${tableName} + ( + Airport_fee, + DOLocationID, + PULocationID, + RatecodeID, + VendorID, + congestion_surcharge, + extra, + fare_amount, + improvement_surcharge, + mta_tax, + passenger_count, + payment_type, + store_and_fwd_flag, + tip_amount, + tolls_amount, + total_amount, + tpep_dropoff_datetime, + tpep_pickup_datetime, + trip_distance + ) VALUES ${values}`; + console.log(sql); + return sql; +}; diff --git a/benchmarking/src/app/query-benchmarking/query-benchmarking.tsx b/benchmarking/src/app/query-benchmarking/query-benchmarking.tsx index ce91252f..3da05a94 100644 --- a/benchmarking/src/app/query-benchmarking/query-benchmarking.tsx +++ b/benchmarking/src/app/query-benchmarking/query-benchmarking.tsx @@ -1,9 +1,19 @@ +import { DBM, DBMNative, DBMParallel } from 'meerkat-dbm/src/dbm'; import { TableWiseFiles } from 'meerkat-dbm/src/types'; import { useMemo, useState } from 'react'; import { TEST_QUERIES } from '../constants'; import { useDBM } from '../hooks/dbm-context'; import { useClassicEffect } from '../hooks/use-classic-effect'; import { generateViewQuery } from '../utils'; +import { createTableSQL, DUMMY_DATA, insertDataSQL } from './dummy-data'; + +const createData = async (dbm: DBM | DBMParallel | DBMNative) => { + const createSQL = createTableSQL(); + const insertSQL = insertDataSQL(DUMMY_DATA.taxi_trips); + await dbm.query(createSQL); + await dbm.query(insertSQL); + console.log('Data created'); +}; export const QueryBenchmarking = () => { const [output, setOutput] = useState< @@ -34,40 +44,45 @@ export const QueryBenchmarking = () => { ); useClassicEffect(() => { - setTotalTime(0); + createData(dbm).then(() => { + setTotalTime(0); + setOutput([]); - setOutput([]); - const promiseArr = []; - const start = performance.now(); - for (let i = 0; i < TEST_QUERIES.length; i++) { - const eachQueryStart = performance.now(); + const promiseArr = []; + const start = performance.now(); + for (let i = 0; i < TEST_QUERIES.length; i++) { + const eachQueryStart = performance.now(); - const promiseObj = dbm - .queryWithTables({ - query: TEST_QUERIES[i], - tables: [{ name: 'taxi' }, { name: 'taxi_json' }], - options: { - ...(fileManagerType !== 'parallel-indexdb' && - fileManagerType !== 'parallel-memory' && { - preQuery: preQuery, - }), - }, - }) - .then((results) => { - const end = performance.now(); - const time = end - eachQueryStart; - setOutput((prev) => [ - ...prev, - { queryName: `Query ${i} ---->`, time }, - ]); - }); + const promiseObj = dbm + .queryWithTables({ + query: TEST_QUERIES[i], + tables: [{ name: 'taxi' }], + options: { + ...(fileManagerType !== 'parallel-indexdb' && + fileManagerType !== 'parallel-memory' && { + preQuery: preQuery, + }), + }, + }) + .then((results) => { + const end = performance.now(); + console.log({ + responseData: results.toArray().map((row) => row.toJSON()), + }); + const time = end - eachQueryStart; + setOutput((prev) => [ + ...prev, + { queryName: `Query ${i} ---->`, time }, + ]); + }); - promiseArr.push(promiseObj); - } - Promise.all(promiseArr).then(() => { - const end = performance.now(); - const time = end - start; - setTotalTime(time); + promiseArr.push(promiseObj); + } + Promise.all(promiseArr).then(() => { + const end = performance.now(); + const time = end - start; + setTotalTime(time); + }); }); }, []);