Skip to content
/ tsbs Public
forked from timescale/tsbs

Commit 9ee4663

Browse files
committed
--wip-- [skip ci]
1 parent 8becc25 commit 9ee4663

File tree

22 files changed

+2150
-49
lines changed

22 files changed

+2150
-49
lines changed

cmd/tsbs_generate_queries/databases/clickhouse/common.go

+22-1
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@ package clickhouse
33
import (
44
"time"
55

6+
"github.com/timescale/tsbs/cmd/tsbs_generate_queries/uses/dea"
67
"github.com/timescale/tsbs/cmd/tsbs_generate_queries/uses/devops"
78
"github.com/timescale/tsbs/cmd/tsbs_generate_queries/utils"
89
"github.com/timescale/tsbs/pkg/query"
910
)
1011

1112
// BaseGenerator contains settings specific for ClickHouse.
1213
type BaseGenerator struct {
13-
UseTags bool
14+
UseTags bool
15+
PropertyAccessMode string
1416
}
1517

1618
// GenerateEmptyQuery returns an empty query.ClickHouse.
@@ -42,3 +44,22 @@ func (g *BaseGenerator) NewDevops(start, end time.Time, scale int) (utils.QueryG
4244

4345
return devops, nil
4446
}
47+
48+
// NewDEA creates a new devops use case query generator.
49+
func (g *BaseGenerator) NewDEA(start, end time.Time, scale int) (utils.QueryGenerator, error) {
50+
core, err := dea.NewCore(start, end, scale)
51+
52+
if err != nil {
53+
return nil, err
54+
}
55+
56+
devops := &DEA{
57+
BaseGenerator: g,
58+
Core: core,
59+
}
60+
61+
return devops, nil
62+
}
63+
64+
// ClickHouse understands and can compare time presented as strings of this format
65+
const clickhouseTimeStringFormat = "2006-01-02 15:04:05"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
package clickhouse
2+
3+
import (
4+
"fmt"
5+
"math/rand"
6+
"strings"
7+
"time"
8+
9+
"github.com/timescale/tsbs/cmd/tsbs_generate_queries/uses/dea"
10+
"github.com/timescale/tsbs/pkg/query"
11+
)
12+
13+
// Devops produces ClickHouse-specific queries for all the devops query types.
14+
type DEA struct {
15+
*dea.Core
16+
*BaseGenerator
17+
}
18+
19+
const (
20+
Json string = "json"
21+
Map string = "map"
22+
)
23+
24+
func Must[T any](v T, err error) T {
25+
if err != nil {
26+
panic(err)
27+
}
28+
return v
29+
}
30+
31+
func (d *DEA) getJSONProperty(key string) string {
32+
return fmt.Sprintf("simpleJSONExtractRaw(propertiesJson, '%s')", key)
33+
}
34+
35+
func (d *DEA) getMapProperty(key string) string {
36+
return fmt.Sprintf("propertiesMap['%s']", key)
37+
}
38+
39+
func (d *DEA) getProperty(key string) string {
40+
switch d.PropertyAccessMode {
41+
case Json:
42+
return d.getJSONProperty(key)
43+
case Map:
44+
return d.getMapProperty(key)
45+
default:
46+
panic(fmt.Sprintf("unknown access mode %s", d.PropertyAccessMode))
47+
}
48+
}
49+
50+
func (d *DEA) getPropertyAlias(property string) string {
51+
return fmt.Sprintf("a_%s", property)
52+
}
53+
54+
func (d *DEA) getAliasedProperties(keys []string) []string {
55+
aliasedProps := make([]string, len(keys))
56+
57+
for i := range keys {
58+
aliasedProps[i] = fmt.Sprintf("%s as %s", d.getProperty(keys[i]), d.getPropertyAlias(keys[i]))
59+
}
60+
61+
return aliasedProps
62+
}
63+
64+
func (d *DEA) getFunnelStepSelectStatements(nSteps int) []string {
65+
var statements []string
66+
67+
for i := range nSteps {
68+
statements = append(statements, fmt.Sprintf("countIf(steps = %d) AS step_%d", i+1, i+1))
69+
}
70+
71+
return statements
72+
}
73+
74+
/*
75+
IF(name = 'navigation' AND simpleJSONExtractString(properties, 'url') LIKE 'app.smartlook.com/sign/up%', 1, 0) AS step_0,
76+
IF(step_0 = 1, timestamp, NULL) AS latest_0,
77+
IF(name = 'signup_step_1', 1, 0) AS step_1,
78+
IF(step_1 = 1, timestamp, NULL) AS latest_1,
79+
IF(name = 'signup_step_2', 1, 0) AS step_2,
80+
IF(step_2 = 1, timestamp, NULL) AS latest_2
81+
*/
82+
func (d *DEA) getFunnelFiltersStatements(nSteps int) []string {
83+
var statements []string
84+
85+
for i := range nSteps {
86+
stepEvent := d.GetRandomEvent()
87+
eventProperty := Must(d.GetRandomProperties(1, dea.AvailableStrProperties))[0]
88+
eventValue := Must(d.GetRandomPropertyValue(eventProperty))
89+
90+
statements = append(statements, fmt.Sprintf("IF (name = '%s' AND %s LIKE '%s', 1, 0)",
91+
stepEvent,
92+
eventProperty,
93+
eventValue))
94+
statements = append(statements, fmt.Sprintf("IF (step_%d = 1, timestamp, NULL) as latest_%d",
95+
i, i))
96+
}
97+
98+
return statements
99+
}
100+
101+
/*
102+
step_0 = 1
103+
OR step_1 = 1
104+
OR step_2 = 1
105+
*/
106+
func (d *DEA) getFunnelConversionStatement(nSteps int) string {
107+
var statements []string
108+
109+
for i := range nSteps - 1 {
110+
statements = append(statements, fmt.Sprintf("step_%d = 1", i))
111+
}
112+
113+
return strings.Join(statements, " OR ")
114+
}
115+
116+
func (d *DEA) getCustomStrictFunnelSQL(nSteps int) string {
117+
interval := d.Interval.MustRandWindow(time.Hour * 24) // TODO: Update to some other interval
118+
119+
sql := fmt.Sprintf(`
120+
SELECT %s
121+
FROM (
122+
SELECT user_id,
123+
steps
124+
FROM (
125+
SELECT user_id,
126+
steps,
127+
max(steps) OVER (PARTITION BY user_id) AS max_steps,
128+
FROM (
129+
SELECT *,
130+
IF(
131+
latest_0 <= latest_1
132+
AND latest_1 <= latest_0 + INTERVAL 14 DAY
133+
AND latest_1 <= latest_2
134+
AND latest_2 <= latest_1 + INTERVAL 14 DAY,
135+
3,
136+
IF(
137+
latest_0 <= latest_1
138+
AND latest_1 <= latest_0 + INTERVAL 14 DAY,
139+
2,
140+
1
141+
)
142+
) AS steps,
143+
IF(
144+
isNotNull(latest_1)
145+
AND latest_1 <= latest_0 + INTERVAL 14 DAY,
146+
dateDiff(
147+
'second',
148+
toDateTime(latest_0),
149+
toDateTime(latest_1)
150+
),
151+
NULL
152+
) AS step_1_conversion_time,
153+
IF(
154+
isNotNull(latest_2)
155+
AND latest_2 <= latest_1 + INTERVAL 14 DAY,
156+
dateDiff(
157+
'second',
158+
toDateTime(latest_1),
159+
toDateTime(latest_2)
160+
),
161+
NULL
162+
) AS step_2_conversion_time
163+
FROM (
164+
SELECT user_id,
165+
eventDate,
166+
step_0,
167+
min(latest_0) OVER (
168+
PARTITION BY user_id
169+
ORDER BY eventDate DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING
170+
) latest_0,
171+
step_1,
172+
min(latest_1) OVER (
173+
PARTITION BY user_id
174+
ORDER BY eventDate DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING
175+
) latest_1,
176+
step_2,
177+
min(latest_2) OVER (
178+
PARTITION BY user_id
179+
ORDER BY eventDate DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING
180+
) latest_2
181+
FROM (
182+
SELECT user_id,
183+
timestamp AS eventDate,
184+
name,
185+
%s
186+
FROM (
187+
SELECT user_id, id, properties, name, timestamp
188+
FROM %s e
189+
WHERE timestamp >= '%s' AND timestamp <= '%s'
190+
) WHERE (
191+
%s
192+
)
193+
)
194+
)
195+
WHERE step_0 = 1
196+
)
197+
)
198+
GROUP BY user_id, steps
199+
HAVING steps = max_steps
200+
)
201+
`, strings.Join(d.getFunnelStepSelectStatements(nSteps), ",\n"),
202+
strings.Join(d.getFunnelFiltersStatements(nSteps), ",\n"),
203+
dea.TableName,
204+
interval.Start().Format(clickhouseTimeStringFormat),
205+
interval.End().Format(clickhouseTimeStringFormat),
206+
d.getFunnelConversionStatement(nSteps))
207+
208+
return sql
209+
}
210+
211+
func (d *DEA) getInternalStrictFunnelSQL(nSteps int) string {
212+
var steps []string
213+
observedEvents := make(map[string]bool)
214+
215+
for range nSteps {
216+
event := d.GetRandomEvent()
217+
observedEvents[event] = true
218+
219+
property := Must(d.GetRandomProperties(1, dea.AvailableStrProperties))[0]
220+
propertyValue := Must(d.GetRandomPropertyValue(property))
221+
222+
stepStatement := fmt.Sprintf("name = '%s' AND %s = '%s'",
223+
event, property, propertyValue)
224+
steps = append(steps, stepStatement)
225+
}
226+
227+
var observedEventsStatements []string
228+
for event := range observedEvents {
229+
observedEventsStatements = append(observedEventsStatements, fmt.Sprintf("'%s'", event))
230+
}
231+
232+
sql := fmt.Sprintf(`SELECT
233+
level,
234+
count() AS c
235+
FROM (
236+
SELECT
237+
user_id,
238+
windowFunnel(6048000000000000)(timestamp,
239+
%s
240+
) AS level
241+
FROM
242+
%s
243+
WHERE (
244+
name IN (%s)
245+
)
246+
GROUP BY user_id
247+
)
248+
GROUP BY level
249+
ORDER BY level ASC;`,
250+
strings.Join(steps, ",\n"),
251+
dea.TableName,
252+
strings.Join(observedEventsStatements, ", "),
253+
)
254+
255+
return sql
256+
}
257+
258+
func (d *DEA) NestedWhere(qi query.Query) {
259+
interval := d.Interval.MustRandWindow(time.Hour * 24) // TODO: Update to some other interval
260+
randomProperties, err := d.GetRandomProperties(4, dea.AvailableStrProperties)
261+
panicIfErr(err)
262+
263+
selectClauses := strings.Join(d.getAliasedProperties(randomProperties), ", ")
264+
265+
whereClauses := fmt.Sprintf("%s = '%s' AND %s != '' AND (%s LIKE '%%%s%%' OR %s LIKE '%%%s') "+
266+
"AND (createdAt >= '%s') AND (createdAt <= '%s')",
267+
d.getPropertyAlias(randomProperties[0]),
268+
Must(d.GetRandomPropertyValue(randomProperties[0])),
269+
d.getPropertyAlias(randomProperties[1]),
270+
d.getPropertyAlias(randomProperties[2]),
271+
Must(d.GetRandomPropertyValue(randomProperties[2]))[2:6],
272+
d.getPropertyAlias(randomProperties[3]),
273+
Must(d.GetRandomPropertyValue(randomProperties[3]))[2:],
274+
interval.Start().Format(clickhouseTimeStringFormat),
275+
interval.End().Format(clickhouseTimeStringFormat))
276+
277+
sql := fmt.Sprintf(`
278+
SELECT toStartOfHour(created_at) AS hour, %s FROM %s WHERE %s
279+
`, selectClauses, dea.TableName, whereClauses)
280+
281+
humanLabel := "ClickHouse nested and dynamic"
282+
humanDesc := fmt.Sprintf("%s: nested where query with dynamic data access", humanLabel)
283+
d.fillInQuery(qi, humanLabel, humanDesc, dea.TableName, sql)
284+
}
285+
286+
func (d *DEA) EventHistogram(qi query.Query) {
287+
interval := d.Interval.MustRandWindow(time.Hour * 24) // TODO: Update to some other interval
288+
289+
sql := fmt.Sprintf(`
290+
SELECT
291+
toDate(timestamp, 'Europe/Prague') as day,
292+
uniq(user_id) as visitors,
293+
count() as cnt
294+
FROM %s
295+
WHERE
296+
name LIKE '%s'
297+
AND timestamp >= now() - INTERVAL 30 DAY
298+
GROUP BY
299+
day
300+
ORDER BY
301+
day WITH FILL
302+
FROM toDate(%s)
303+
TO toDate(%s)
304+
`,
305+
dea.TableName,
306+
d.GetRandomEvent(),
307+
interval.Start().Format(clickhouseTimeStringFormat),
308+
interval.End().Format(clickhouseTimeStringFormat))
309+
310+
d.fillInQuery(qi, "ClickHouse Event histogram", "Clickhouse Event histogram", dea.TableName, sql)
311+
}
312+
313+
func (d *DEA) Funnel(qi query.Query, inOrder bool, minSteps, maxSteps int) {
314+
if maxSteps < minSteps {
315+
panic(fmt.Errorf("maximum steps (%d) is less than minimum steps (%d)", maxSteps, minSteps))
316+
}
317+
318+
nSteps := rand.Intn(maxSteps-minSteps) + minSteps + 1
319+
// selectStatement :=
320+
321+
sql := d.getInternalStrictFunnelSQL(nSteps)
322+
323+
d.fillInQuery(qi, "ClickHouse Event histogram", "Clickhouse Event histogram", dea.TableName, sql)
324+
}

0 commit comments

Comments
 (0)