diff --git a/sql/data_challenge/README.md b/sql/data_challenge/README.md new file mode 100644 index 0000000..f66297e --- /dev/null +++ b/sql/data_challenge/README.md @@ -0,0 +1,7 @@ +Exasol Data Challenge Sample Data. + +The tutorial.sql contains +exercises on loading data, +writing queries, +and applying Python functions +for analysis. \ No newline at end of file diff --git a/sql/data_challenge/import.sql b/sql/data_challenge/import.sql new file mode 100644 index 0000000..e0ac49d --- /dev/null +++ b/sql/data_challenge/import.sql @@ -0,0 +1 @@ +SELECT 1; diff --git a/sql/data_challenge/tutorial.sql b/sql/data_challenge/tutorial.sql new file mode 100644 index 0000000..7ba29d0 --- /dev/null +++ b/sql/data_challenge/tutorial.sql @@ -0,0 +1,68 @@ +/* +EXASOL DATA CHALLENGE +*/ + +-- let's create a schema for our data as well as some empty tables +CREATE SCHEMA DATA_CHALLENGE; +CREATE TABLE DATA_CHALLENGE.ITEM (I_ITEM_SK SMALLINT , I_ITEM_ID CHAR(16) , I_REC_START_DATE DATE, I_REC_END_DATE DATE, I_ITEM_DESC VARCHAR(200), I_CURRENT_PRICE DECIMAL(7,2), I_WHOLESALE_COST DECIMAL(7,2), I_BRAND_ID INTEGER, I_BRAND CHAR(50), I_CLASS_ID INTEGER, I_CLASS CHAR(50), I_CATEGORY_ID INTEGER, I_CATEGORY CHAR(50), I_MANUFACT_ID INTEGER, I_MANUFACT CHAR(50), I_SIZE CHAR(20), I_FORMULATION CHAR(20), I_COLOR CHAR(20), I_UNITS CHAR(10), I_CONTAINER CHAR(10), I_MANAGER_ID INTEGER, I_PRODUCT_NAME CHAR(50), DISTRIBUTE BY I_ITEM_SK); +CREATE TABLE DATA_CHALLENGE.CUSTOMER (C_CUSTOMER_SK SMALLINT , C_CUSTOMER_ID CHAR(16) , C_CURRENT_CDEMO_SK SMALLINT, C_CURRENT_HDEMO_SK SMALLINT, C_CURRENT_ADDR_SK SMALLINT, C_FIRST_SHIPTO_DATE_SK SMALLINT, C_FIRST_SALES_DATE_SK SMALLINT, C_SALUTATION CHAR(10), C_FIRST_NAME CHAR(20), C_LAST_NAME CHAR(30), C_PREFERRED_CUST_FLAG CHAR(1), C_BIRTH_DAY INTEGER, C_BIRTH_MONTH INTEGER, C_BIRTH_YEAR INTEGER, C_BIRTH_COUNTRY VARCHAR(20), C_LOGIN CHAR(13), C_EMAIL_ADDRESS CHAR(50), C_LAST_REVIEW_DATE_SK SMALLINT, DISTRIBUTE BY C_CUSTOMER_SK); +CREATE TABLE DATA_CHALLENGE.HOUSEHOLD_DEMOGRAPHICS (HD_DEMO_SK SMALLINT , HD_INCOME_BAND_SK SMALLINT, HD_BUY_POTENTIAL CHAR(15), HD_DEP_COUNT INTEGER, HD_VEHICLE_COUNT INTEGER, DISTRIBUTE BY HD_DEMO_SK); +CREATE TABLE DATA_CHALLENGE.WEB_SALES (WS_SOLD_DATE_SK SMALLINT, WS_SOLD_TIME_SK SMALLINT, WS_SHIP_DATE_SK SMALLINT, WS_ITEM_SK SMALLINT , WS_BILL_CUSTOMER_SK SMALLINT, WS_BILL_CDEMO_SK SMALLINT, WS_BILL_HDEMO_SK SMALLINT, WS_BILL_ADDR_SK SMALLINT, WS_SHIP_CUSTOMER_SK SMALLINT, WS_SHIP_CDEMO_SK SMALLINT, WS_SHIP_HDEMO_SK SMALLINT, WS_SHIP_ADDR_SK SMALLINT, WS_WEB_PAGE_SK SMALLINT, WS_WEB_SITE_SK SMALLINT, WS_SHIP_MODE_SK SMALLINT, WS_WAREHOUSE_SK SMALLINT, WS_PROMO_SK SMALLINT, WS_ORDER_NUMBER INTEGER , WS_QUANTITY INTEGER, WS_WHOLESALE_COST DECIMAL(7,2), WS_LIST_PRICE DECIMAL(7,2), WS_SALES_PRICE DECIMAL(7,2), WS_EXT_DISCOUNT_AMT DECIMAL(7,2), WS_EXT_SALES_PRICE DECIMAL(7,2), WS_EXT_WHOLESALE_COST DECIMAL(7,2), WS_EXT_LIST_PRICE DECIMAL(7,2), WS_EXT_TAX DECIMAL(7,2), WS_COUPON_AMT DECIMAL(7,2), WS_EXT_SHIP_COST DECIMAL(7,2), WS_NET_PAID DECIMAL(7,2), WS_NET_PAID_INC_TAX DECIMAL(7,2), WS_NET_PAID_INC_SHIP DECIMAL(7,2), WS_NET_PAID_INC_SHIP_TAX DECIMAL(7,2), WS_NET_PROFIT DECIMAL(7,2), DISTRIBUTE BY WS_ITEM_SK); + +-- also let's create a connection to S3 +CREATE OR REPLACE CONNECTION S3_DATA_CHALLENGE +TO 'http://exa-data-challenge.s3-eu-central-1.amazonaws.com'; + +/* +now it's your turn + +1. load data into the tables. Data is stored under the previously defined S3_DATA_CHALLENGE connection. +Files are called customer.csv, item.csv, household_demographics.csv and web_sales.csv +There is a single CSV file per table, files contain a header and are separated by semicolon. +Rows are terminated by Unix LF, file are UTF-8 encoded +*/ + + +/* +2. Now that we have loaded the data, let's familiarize ourselves and write some queries. +You can find the ER diagram for the data here (2.3.5): https://www.tpc.org/TPC_Documents_Current_Versions/pdf/TPC-DS_v4.0.0.pdf +*/ + +/* +Q1: Get the first and last name of customers that have at least one vehicle in the household. +Hint: you can join the customer and household data on C_CURRENT_HDEMO_SK = HD_DEMO_SK +*/ + + +/* +Q2: Now let's refine Q1 and additionally filter down to those customers that had web sales of at least 250$ of list price +Hint: you can join customer and web_sales data on ws_bill_customer_sk=c_customer_sk +*/ + + +/* +Q3: Now let's extend Q3 and add the item category to the order +Hint: you can join web_sales and item data on WS_ITEM_SK=I_ITEM_SK +Make sure that the categories returned are NOT NULL +*/ + + +/* +3. Let's apply some Python to the data. Write a Python UDF to determine the Levenshtein distance between two strings. +*/ + + +/* +4. Now let's bring our Python function and Q3 together. +We want to obtain the results from Q3 and want to add a column for the Levenshtein distance between I_CATEGORY and the word "Shoe". +Hint: Make sure to trim I_CATEGORY +*/ + +/* +5. Final challenge, let's count the results of our previous query that have a Levenshtein distance of 1. +*/ + + +/* +You have mastered the challenge, congratulations! +*/ diff --git a/sql/retail_mini/README.md b/sql/retail_mini/README.md new file mode 100644 index 0000000..d2ab742 --- /dev/null +++ b/sql/retail_mini/README.md @@ -0,0 +1,9 @@ +Sample data used in EXAcademy. + +The data consists of +some sample sales data. + +The tutorial.sql contains +some queries used +in the course +"Exasol Performance Management". diff --git a/sql/retail_mini/import.sql b/sql/retail_mini/import.sql new file mode 100644 index 0000000..c64e7df --- /dev/null +++ b/sql/retail_mini/import.sql @@ -0,0 +1,80 @@ +create schema retail; + +CREATE TABLE CITIES ( + CITY_ID DECIMAL(9,0), + COUNTRY_CODE VARCHAR(2) UTF8, + ZIP_CODE VARCHAR(20) UTF8, + CITY_NAME VARCHAR(200) UTF8, + DISTRICT VARCHAR(50) UTF8, + AREA VARCHAR(50) UTF8, + AREA_SHORT VARCHAR(50) UTF8, + LAT DECIMAL(9,6), + LON DECIMAL(9,6) + ); + +CREATE TABLE ARTICLE ( + ARTICLE_ID DECIMAL(9,0), + DESCRIPTION VARCHAR(100) UTF8, + EAN DECIMAL(18,0), + BASE_SALES_PRICE DECIMAL(9,2), + PURCHASE_PRICE DECIMAL(9,2), + PRODUCT_GROUP DECIMAL(9,0), + PRODUCT_CLASS DECIMAL(9,0), + QUANTITY_UNIT VARCHAR(100) UTF8, + TMP_OLD_NR DECIMAL(9,0), + PRODUCT_GROUP_DESC VARCHAR(100) UTF8, + DISTRIBUTION_COST DECIMAL(9,2) + ); + +CREATE TABLE MARKETS ( + MARKET_ID DECIMAL(9,0), + LONGITUDE DECIMAL(9,6), + LATITUDE DECIMAL(9,6), + POSTAL_CODE CHAR(5) UTF8, + CITY VARCHAR(50) UTF8, + AREA VARCHAR(50) UTF8, + CITY_ID DECIMAL(9,0), + POPULATION DECIMAL(18,0) + ); + +CREATE TABLE SALES ( + SALES_ID DECIMAL(18,0), + SALES_DATE DATE, + SALES_TIMESTAMP TIMESTAMP, + PRICE DECIMAL(9,2), + MONEY_GIVEN DECIMAL(9,2), + RETURNED_CHANGE DECIMAL(9,2), + LOYALTY_ID DECIMAL(18,0), + MARKET_ID DECIMAL(9,0), + TERMINAL_ID DECIMAL(9,0), + EMPLOYEE_ID DECIMAL(9,0), + TERMINAL_DAILY_SALES_NR DECIMAL(9,0), + DISTRIBUTE BY SALES_ID + ); + +CREATE TABLE SALES_POSITIONS ( + SALES_ID DECIMAL(18,0), + POSITION_ID DECIMAL(9,0), + ARTICLE_ID DECIMAL(9,0), + AMOUNT DECIMAL(9,0), + PRICE DECIMAL(9,2), + VOUCHER_ID DECIMAL(9,0), + CANCELED BOOLEAN, + DISTRIBUTE BY SALES_ID + ); + +CREATE TABLE DIM_DATE ( + SALES_DATE DATE + ); + + + +CREATE OR REPLACE CONNECTION S3_SAMPLE_BUCKET TO 'https://exasol-test-sample-worksheet.s3.eu-central-1.amazonaws.com'; + +IMPORT INTO CITIES from CSV at S3_SAMPLE_BUCKET file 'retail_mini/CITIES.csv'; +IMPORT INTO ARTICLE from CSV at S3_SAMPLE_BUCKET file 'retail_mini/ARTICLE.csv'; +IMPORT INTO MARKETS from CSV at S3_SAMPLE_BUCKET file 'retail_mini/MARKETS.csv'; +IMPORT INTO SALES from CSV at S3_SAMPLE_BUCKET file 'retail_mini/SALES.csv'; +IMPORT INTO SALES_POSITIONS from CSV at S3_SAMPLE_BUCKET file 'retail_mini/SALES_POSITIONS.csv'; +IMPORT INTO DIM_DATE from CSV at S3_SAMPLE_BUCKET file 'retail_mini/DIM_DATE.csv'; + diff --git a/sql/retail_mini/tutorial.sql b/sql/retail_mini/tutorial.sql new file mode 100644 index 0000000..b2eb1eb --- /dev/null +++ b/sql/retail_mini/tutorial.sql @@ -0,0 +1,28 @@ +OPEN SCHEMA RETAIL; +-- DISTRIBUTE SALES_POSITIONS and SALES by sales_id, which is our join column +ALTER TABLE SALES_POSITIONS DISTRIBUTE BY SALES_ID; +ALTER TABLE SALES DISTRIBUTE BY SALES_ID; + +-- Verify if tables are well distributed +SELECT iproc(), count(*) FROM sales_positions GROUP BY iproc(); +SELECT iproc(), count(*) FROM sales GROUP BY iproc(); + +-- Enable profiling +ALTER SESSION SET PROFILE = 'on'; + +-- Run first query +SELECT SUM(sp.amount) FROM SALES s JOIN sales_positions sp ON S.sales_id = sp.sales_id WHERE sales_date = '2015-03-05'; + +FLUSH STATISTICS; + +-- Show query details +SELECT * FROM EXA_USER_PROFILE_LAST_DAY WHERE session_id=current_session AND stmt_id=current_statement-4; + +-- Run second query +SELECT S.market_id, SUM(P.amount) AS total_amount FROM SALES_POSITIONS P JOIN SALES S ON P.sales_id = S.sales_id GROUP BY S.market_id; + +FLUSH STATISTICS; + +-- Show query details +SELECT * FROM EXA_USER_PROFILE_LAST_DAY WHERE session_id=current_session AND stmt_id=current_statement-4; + diff --git a/sql/tpc-h-10GB/README.md b/sql/tpc-h-10GB/README.md new file mode 100644 index 0000000..ce506d4 --- /dev/null +++ b/sql/tpc-h-10GB/README.md @@ -0,0 +1,3 @@ +# TPC-H Tutorial + +Description of the tutorial \ No newline at end of file diff --git a/sql/tpc-h-10GB/import.sql b/sql/tpc-h-10GB/import.sql new file mode 100644 index 0000000..61c01ac --- /dev/null +++ b/sql/tpc-h-10GB/import.sql @@ -0,0 +1,3 @@ +/* +Import TPC-H 10GB +*/ \ No newline at end of file diff --git a/sql/tpc-h-10GB/tutorial.sql b/sql/tpc-h-10GB/tutorial.sql new file mode 100644 index 0000000..126d965 --- /dev/null +++ b/sql/tpc-h-10GB/tutorial.sql @@ -0,0 +1,907 @@ +OPEN SCHEMA tpc; +COMMIT; + +-- $ID$ +-- TPC-H/TPC-R Pricing Summary Report Query (Q1) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 1 0 + + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval '68' day (3) +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; +COMMIT; + +-- $ID$ +-- TPC-H/TPC-R Minimum Cost Supplier Query (Q2) +-- Functional Query Definition +-- Approved February 1998 +-- Minor modification - result set limit ( 2.1.2.9.3 ) +-- TPC-H Query 2 0 + + +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 38 + and p_type like '%STEEL' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey +LIMIT 100 +; +COMMIT; + +-- $ID$ +-- TPC-H/TPC-R Shipping Priority Query (Q3) +-- Functional Query Definition +-- Approved February 1998 +-- Minor modification - result set limit ( 2.1.2.9.3 ) +-- TPC-H Query 3 0 + + +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'FURNITURE' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date '1995-03-17' + and l_shipdate > date '1995-03-17' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate +LIMIT 10 +; +COMMIT; + + +-- $ID$ +-- TPC-H/TPC-R Order Priority Checking Query (Q4) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 4 0 + + +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1995-08-01' + and o_orderdate < date '1995-08-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; +COMMIT; + + +-- $ID$ +-- TPC-H/TPC-R Local Supplier Volume Query (Q5) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 5 0 + + +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'AMERICA' + and o_orderdate >= date '1993-01-01' + and o_orderdate < date '1993-01-01' + interval '1' year +group by + n_name +order by + revenue desc; +COMMIT; + + +-- $ID$ +-- TPC-H/TPC-R Forecasting Revenue Change Query (Q6) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 6 0 + + +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= date '1993-01-01' + and l_shipdate < date '1993-01-01' + interval '1' year + and l_discount between 0.07 - 0.01 and 0.07 + 0.01 + and l_quantity < 25; +COMMIT; + +-- $ID$ +-- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 17 0 + + +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container = 'SM BAG' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ); +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Volume Shipping Query (Q7) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 7 0 + + +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + extract(year from l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'MOZAMBIQUE' and n2.n_name = 'UNITED KINGDOM') + or (n1.n_name = 'UNITED KINGDOM' and n2.n_name = 'MOZAMBIQUE') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year; +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R National Market Share Query (Q8) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 8 0 + + +select + o_year, + sum(case + when nation = 'MOZAMBIQUE' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'AFRICA' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'PROMO POLISHED TIN' + ) as all_nations +group by + o_year +order by + o_year; +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Product Type Profit Measure Query (Q9) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 9 0 + + +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%thistle%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Returned Item Reporting Query (Q10) +-- Functional Query Definition +-- Approved February 1998 +-- Minor modification - result set limit ( 2.1.2.9.3 ) +-- TPC-H Query 10 0 + + +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date '1993-11-01' + and o_orderdate < date '1993-11-01' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc +LIMIT 20 +; +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Important Stock Identification Query (Q11) +-- Functional Query Definition +-- Approved February 1998 +-- Minor modification - Quoting keyword value ( 2.2.3.3 k) ) +-- TPC-H Query 11 0 + + +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as "VALUE" +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'JAPAN' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0000001000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'JAPAN' + ) +order by + "VALUE" desc; +COMMIT; + + + + +-- $ID$ +-- TPC-H/TPC-R Shipping Modes and Order Priority Query (Q12) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 12 0 + + +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('FOB', 'REG AIR') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date '1993-01-01' + and l_receiptdate < date '1993-01-01' + interval '1' year +group by + l_shipmode +order by + l_shipmode; +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Customer Distribution Query (Q13) +-- Functional Query Definition +-- Approved February 1998 +-- Minor modification - Naming of the columns of the sub select - different syntax for select-list AS clause ( 2.2.3.3 b) ) +-- TPC-H Query 13 0 + + +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%special%packages%' + group by + c_custkey + ) as c_orders (c_custkey, c_count) +group by + c_count +order by + custdist desc, + c_count desc; +COMMIT; + + +-- $ID$ +-- TPC-H/TPC-R Promotion Effect Query (Q14) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 14 0 + + +select + 100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= date '1993-04-01' + and l_shipdate < date '1993-04-01' + interval '1' month; +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Top Supplier Query (Q15) +-- Functional Query Definition +-- Approved February 1998 +-- Using approved Variant A of Q15 (Appendix B) - 'with clause' instead of 'create view/drop view' +-- Variant allowed because of 2.2.4.2 and 2.2.4.3 +-- TPC-H Query 15 0 + + + +with revenue0 (supplier_no, total_revenue) as +( + select + l_suppkey, + sum(l_extendedprice * (1 - l_discount)) + from + lineitem + where + l_shipdate >= date '1995-07-01' + and l_shipdate < date '1995-07-01' + interval '3' month + group by + l_suppkey +) +select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +from + supplier, + revenue0 +where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue0 + ) +order by + s_suppkey; + +COMMIT; + + +-- $ID$ +-- TPC-H/TPC-R Parts/Supplier Relationship Query (Q16) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 16 0 + + +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#41' + and p_type not like 'MEDIUM BURNISHED%' + and p_size in (4, 22, 35, 31, 47, 44, 30, 11) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 17 0 + + +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container = 'SM BAG' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ); +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Large Volume Customer Query (Q18) +-- Function Query Definition +-- Approved February 1998 +-- Minor modification - result set limit ( 2.1.2.9.3 ) +-- TPC-H Query 18 0 + + +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 313 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate +LIMIT 100 +; +COMMIT; + + + + +-- $ID$ +-- TPC-H/TPC-R Discounted Revenue Query (Q19) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 19 0 + + +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#13' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 6 and l_quantity <= 6 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#43' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 11 and l_quantity <= 11 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#55' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 27 and l_quantity <= 27 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ); +COMMIT; + + + +-- $ID$ +-- TPC-H/TPC-R Potential Part Promotion Query (Q20) +-- Function Query Definition +-- Approved February 1998 +-- TPC-H Query 20 0 + + +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'ivory%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'KENYA' +order by + s_name; +COMMIT; + + +-- $ID$ +-- TPC-H/TPC-R Suppliers Who Kept Orders Waiting Query (Q21) +-- Functional Query Definition +-- Approved February 1998 +-- Minor modification - result set limit ( 2.1.2.9.3 ) +-- TPC-H Query 21 0 + + +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'PERU' +group by + s_name +order by + numwait desc, + s_name +LIMIT 100 +; +COMMIT; + + +-- $ID$ +-- TPC-H/TPC-R Global Sales Opportunity Query (Q22) +-- Functional Query Definition +-- Approved February 1998 +-- TPC-H Query 22 0 + + +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('34', '43', '41', '20', '25', '38', '33') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('34', '43', '41', '20', '25', '38', '33') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; +COMMIT; + diff --git a/sql/tpc-h-1T/README.md b/sql/tpc-h-1T/README.md new file mode 100644 index 0000000..ce506d4 --- /dev/null +++ b/sql/tpc-h-1T/README.md @@ -0,0 +1,3 @@ +# TPC-H Tutorial + +Description of the tutorial \ No newline at end of file diff --git a/sql/tpc-h-1T/import.sql b/sql/tpc-h-1T/import.sql new file mode 100644 index 0000000..45e03bd --- /dev/null +++ b/sql/tpc-h-1T/import.sql @@ -0,0 +1,3 @@ +/* +Import TPC-H 1T +*/ \ No newline at end of file diff --git a/sql/tpc-h-1T/tutorial.sql b/sql/tpc-h-1T/tutorial.sql new file mode 100644 index 0000000..e86e194 --- /dev/null +++ b/sql/tpc-h-1T/tutorial.sql @@ -0,0 +1,3 @@ +/* +Tutorial queries +*/ \ No newline at end of file