Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified benchmarking/public/data-sets/fhvhv_tripdata_2023-01.parquet
Binary file not shown.
128 changes: 33 additions & 95 deletions benchmarking/src/app/constants.ts
Original file line number Diff line number Diff line change
@@ -1,102 +1,40 @@
export const TEST_QUERIES = [
`
WITH group_by_query AS (
SELECT
hvfhs_license_num,
COUNT(*)
FROM
taxi
GROUP BY
hvfhs_license_num
),
import { tableName } from './query-benchmarking/dummy-data';

full_query AS (
SELECT
*
FROM
taxi
)
const QUERY_1 = `SELECT * FROM taxi limit 50`;

SELECT
COUNT(*)
FROM
group_by_query
LEFT JOIN
full_query
ON
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
LIMIT 1
`,
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi',
"SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100",
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num',
'SELECT * FROM taxi ORDER BY bcf LIMIT 100',
`
WITH group_by_query AS (
SELECT
hvfhs_license_num,
COUNT(*)
FROM
taxi
GROUP BY
hvfhs_license_num
),
const QUERY_2 = `(SELECT *
FROM taxi t1
WHERE t1.PULocationID NOT IN (
SELECT PULocationID
FROM in_memory_taxi_trips
))
UNION
(SELECT *
FROM in_memory_taxi_trips) limit 50`;

full_query AS (
SELECT
*
FROM
taxi
)
const QUERY_3 = `(SELECT *
FROM taxi t1
WHERE NOT EXISTS (
SELECT 1
FROM in_memory_taxi_trips t2
WHERE t2.PULocationID = t1.PULocationID
))
UNION ALL
(SELECT * FROM in_memory_taxi_trips) limit 50`;

SELECT
COUNT(*)
FROM
group_by_query
LEFT JOIN
full_query
ON
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
LIMIT 1
`,
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi',
"SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100",
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num',
'SELECT * FROM taxi ORDER BY bcf LIMIT 100',
`
WITH group_by_query AS (
SELECT
hvfhs_license_num,
COUNT(*)
FROM
taxi
GROUP BY
hvfhs_license_num
),
const QUERY_4 = `SELECT t1.*,
FROM taxi t1
LEFT ANTI JOIN ${tableName} t2
ON t1.PULocationID = t2.PULocationID
UNION ALL
SELECT *,
FROM ${tableName} limit 50`;

full_query AS (
SELECT
*
FROM
taxi
)
const BASE_QUERY = `(SELECT * FROM taxi LIMIT 50) AS taxi`;

SELECT
COUNT(*)
FROM
group_by_query
LEFT JOIN
full_query
ON
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
LIMIT 1
`,
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi',
"SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100",
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num',
'SELECT * FROM taxi ORDER BY bcf LIMIT 100',
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi_json',
'SELECT * FROM taxi_json WHERE price >= 1.0005812645 LIMIT 100',
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi_json GROUP BY order_count',
'SELECT * FROM taxi_json ORDER BY seconds_in_bucket LIMIT 100',
export const TEST_QUERIES = [
`SELECT count(*) FROM taxi`,
// `select * from (
// SELECT * FROM ${BASE_QUERY} WHERE taxi.PULocationID NOT IN (SELECT ${tableName}.PULocationID FROM ${tableName})
// UNION ALL (SELECT * FROM ${tableName}))`,
];
151 changes: 151 additions & 0 deletions benchmarking/src/app/query-benchmarking/dummy-data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
interface TaxiTrip {
Airport_fee: number;
DOLocationID: number;
PULocationID: number;
RatecodeID: number;
VendorID: number;
congestion_surcharge: number;
extra: number;
fare_amount: number;
improvement_surcharge: number;
mta_tax: number;
passenger_count: number;
payment_type: number;
store_and_fwd_flag: 'Y' | 'N';
tip_amount: number;
tolls_amount: number;
total_amount: number;
tpep_dropoff_datetime: string; // Unix timestamp in milliseconds
tpep_pickup_datetime: string; // Unix timestamp in milliseconds
trip_distance: number;
}

// For a collection of trips
interface TaxiTripsData {
taxi_trips: TaxiTrip[];
}

export const tableName = 'in_memory_taxi_trips';

export const DUMMY_DATA: TaxiTripsData = {
taxi_trips: [
{
Airport_fee: 0,
DOLocationID: 79,
PULocationID: 99,
RatecodeID: 1,
VendorID: 2,
congestion_surcharge: 2.5,
extra: 1,
fare_amount: 17.7,
improvement_surcharge: 1,
mta_tax: 0.5,
passenger_count: 1,
payment_type: 2,
store_and_fwd_flag: 'N',
tip_amount: 0,
tolls_amount: 0,
total_amount: 22.7,
tpep_dropoff_datetime: `'2023-01-01 14:30:00'`,
tpep_pickup_datetime: `'2023-01-01 14:30:00'`,
trip_distance: 1.72,
},
{
Airport_fee: 0,
DOLocationID: 236,
PULocationID: 10,
RatecodeID: 1,
VendorID: 1,
congestion_surcharge: 2.5,
extra: 3.5,
fare_amount: 10,
improvement_surcharge: 1,
mta_tax: 0.5,
passenger_count: 1,
payment_type: 1,
store_and_fwd_flag: 'N',
tip_amount: 3.75,
tolls_amount: 0,
total_amount: 18.75,
tpep_dropoff_datetime: `'2023-01-01 14:30:00'`,
tpep_pickup_datetime: `'2023-01-01 14:30:00'`,
trip_distance: 1.8,
},
],
};

export const createTableSQL = () => {
return `CREATE TABLE IF NOT EXISTS ${tableName} (
VendorID INTEGER,
tpep_pickup_datetime TIMESTAMP,
tpep_dropoff_datetime TIMESTAMP,
passenger_count INTEGER,
trip_distance DECIMAL(10,2),
RatecodeID INTEGER,
store_and_fwd_flag VARCHAR(1),
PULocationID INTEGER,
DOLocationID INTEGER,
payment_type INTEGER,
fare_amount DECIMAL(10,2),
extra DECIMAL(10,2),
mta_tax DECIMAL(10,2),
tip_amount DECIMAL(10,2),
tolls_amount DECIMAL(10,2),
improvement_surcharge DECIMAL(10,2),
total_amount DECIMAL(10,2),
congestion_surcharge DECIMAL(10,2),
Airport_fee DECIMAL(10,2),
)`;
};

export const insertDataSQL = (taxiTrips: TaxiTrip[]) => {
const values = taxiTrips
.map(
(trip) => `(
${trip.Airport_fee},
${trip.DOLocationID},
${trip.PULocationID},
${trip.RatecodeID},
${trip.VendorID},
${trip.congestion_surcharge},
${trip.extra},
${trip.fare_amount},
${trip.improvement_surcharge},
${trip.mta_tax},
${trip.passenger_count},
${trip.payment_type},
'${trip.store_and_fwd_flag}',
${trip.tip_amount},
${trip.tolls_amount},
${trip.total_amount},
${trip.tpep_dropoff_datetime},
${trip.tpep_pickup_datetime},
${trip.trip_distance}
)`
)
.join(',');
const sql = `INSERT INTO ${tableName}
(
Airport_fee,
DOLocationID,
PULocationID,
RatecodeID,
VendorID,
congestion_surcharge,
extra,
fare_amount,
improvement_surcharge,
mta_tax,
passenger_count,
payment_type,
store_and_fwd_flag,
tip_amount,
tolls_amount,
total_amount,
tpep_dropoff_datetime,
tpep_pickup_datetime,
trip_distance
) VALUES ${values}`;
console.log(sql);
return sql;
};
77 changes: 46 additions & 31 deletions benchmarking/src/app/query-benchmarking/query-benchmarking.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
import { DBM, DBMNative, DBMParallel } from 'meerkat-dbm/src/dbm';
import { TableWiseFiles } from 'meerkat-dbm/src/types';
import { useMemo, useState } from 'react';
import { TEST_QUERIES } from '../constants';
import { useDBM } from '../hooks/dbm-context';
import { useClassicEffect } from '../hooks/use-classic-effect';
import { generateViewQuery } from '../utils';
import { createTableSQL, DUMMY_DATA, insertDataSQL } from './dummy-data';

const createData = async (dbm: DBM | DBMParallel | DBMNative) => {
const createSQL = createTableSQL();
const insertSQL = insertDataSQL(DUMMY_DATA.taxi_trips);
await dbm.query(createSQL);
await dbm.query(insertSQL);
console.log('Data created');
};

export const QueryBenchmarking = () => {
const [output, setOutput] = useState<
Expand Down Expand Up @@ -34,40 +44,45 @@
);

useClassicEffect(() => {
setTotalTime(0);
createData(dbm).then(() => {
setTotalTime(0);
setOutput([]);

setOutput([]);
const promiseArr = [];
const start = performance.now();
for (let i = 0; i < TEST_QUERIES.length; i++) {
const eachQueryStart = performance.now();
const promiseArr = [];
const start = performance.now();
for (let i = 0; i < TEST_QUERIES.length; i++) {
const eachQueryStart = performance.now();

const promiseObj = dbm
.queryWithTables({
query: TEST_QUERIES[i],
tables: [{ name: 'taxi' }, { name: 'taxi_json' }],
options: {
...(fileManagerType !== 'parallel-indexdb' &&
fileManagerType !== 'parallel-memory' && {
preQuery: preQuery,
}),
},
})
.then((results) => {
const end = performance.now();
const time = end - eachQueryStart;
setOutput((prev) => [
...prev,
{ queryName: `Query ${i} ---->`, time },
]);
});
const promiseObj = dbm
.queryWithTables({
query: TEST_QUERIES[i],
tables: [{ name: 'taxi' }],
options: {
...(fileManagerType !== 'parallel-indexdb' &&
fileManagerType !== 'parallel-memory' && {
preQuery: preQuery,
}),
},
})
.then((results) => {
const end = performance.now();
console.log({
responseData: results.toArray().map((row) => row.toJSON()),

Check failure on line 70 in benchmarking/src/app/query-benchmarking/query-benchmarking.tsx

View workflow job for this annotation

GitHub Actions / build

'results' is of type 'unknown'.

Check failure on line 70 in benchmarking/src/app/query-benchmarking/query-benchmarking.tsx

View workflow job for this annotation

GitHub Actions / build

Parameter 'row' implicitly has an 'any' type.
});
const time = end - eachQueryStart;
setOutput((prev) => [
...prev,
{ queryName: `Query ${i} ---->`, time },
]);
});

promiseArr.push(promiseObj);
}
Promise.all(promiseArr).then(() => {
const end = performance.now();
const time = end - start;
setTotalTime(time);
promiseArr.push(promiseObj);
}
Promise.all(promiseArr).then(() => {
const end = performance.now();
const time = end - start;
setTotalTime(time);
});
});
}, []);

Expand Down
Loading