@@ -21,7 +21,6 @@ function graph_box_plot(experiment_params_list::Vector{ExperimentParams};
21
21
# load the results
22
22
results_filename = params_to_results_filename (experiment_params)
23
23
results_path = " Experiments/Results/Estimation_" * results_filename
24
- # println("results path: ", results_path)
25
24
results_df = CSV. read (results_path, DataFrame; normalizenames= true )
26
25
27
26
# keep track of the data points
@@ -105,7 +104,6 @@ function graph_grouped_box_plot(experiment_params_list::Vector{ExperimentParams}
105
104
# load the results
106
105
results_filename = params_to_results_filename (experiment_params)
107
106
results_path = " Experiments/Results/Estimation_" * results_filename
108
- # println("results path: ", results_path)
109
107
results_df = CSV. read (results_path, DataFrame; normalizenames= true )
110
108
111
109
# get the x_value and grouping (same for all results in this experiment param)
@@ -186,24 +184,36 @@ function comparison_dataset()
186
184
dataset = comparison_results[i, :Dataset ]
187
185
query_path = comparison_results[i, :Query ]
188
186
if dataset == " lubm80"
189
- comparison_results[i, :QueryType ] = match (r" .*/lubm80_(.*).txt" , query_path). captures[1 ]
187
+ if ! isnothing (match (r" .*/lubm80_(.*).txt" , query_path))
188
+ comparison_results[i, :QueryType ] = match (r" .*/lubm80_(.*).txt" , query_path). captures[1 ]
189
+ else
190
+ comparison_results[i, :QueryType ] = " n/a"
191
+ end
190
192
elseif dataset in [" aids" , " human" , " yago" ]
191
- comparison_results[i, :QueryType ] = match (r" (.*)_.*/.*" , query_path). captures[1 ]
193
+ if ! isnothing (match (r" (.*)_.*/.*" , query_path))
194
+ comparison_results[i, :QueryType ] = match (r" (.*)_.*/.*" , query_path). captures[1 ]
195
+ else
196
+ comparison_results[i, :QueryType ] = " n/a"
197
+ end
192
198
else
193
- comparison_results[i, :QueryType ] = match (r" .*/query_(.*)_.*" , query_path). captures[1 ]
199
+ if ! isnothing (match (r" .*/query_(.*)_.*" , query_path))
200
+ comparison_results[i, :QueryType ] = match (r" .*/query_(.*)_.*" , query_path). captures[1 ]
201
+ else
202
+ comparison_results[i, :QueryType ] = " n/a"
203
+ end
194
204
end
195
205
end
196
206
results_dict = Dict ()
197
207
for i in 1 : nrow (comparison_results)
198
208
dataset = comparison_results[i, :Dataset ]
199
209
estimator = comparison_results[i, :Estimator ]
200
- query_path = comparison_results[i, :Query ]
210
+ query_path = (estimator == " lss " ) ? " query " * string (i) : comparison_results[i, :Query ]
201
211
results_dict[(dataset, estimator, query_path)] = (Estimate= comparison_results[i, :Value ],
202
212
Runtime= comparison_results[i, :Runtime ],
203
213
QueryType= comparison_results[i,:QueryType ])
204
214
end
205
215
estimators = unique (comparison_results[:, :Estimator ])
206
- println (estimators)
216
+ println (" Estimators: " , estimators)
207
217
return estimators, results_dict
208
218
end
209
219
@@ -265,12 +275,12 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
265
275
current_y = if y_type == estimate_error
266
276
min (10 ^ 30 , max (1 , results_df[i, :Estimate ])) / results_df[i, :TrueCard ]
267
277
else # y_type == runtime
268
- results_df[i, :EstimationTime ]
278
+ typeof (results_df[i, :EstimationTime ]) == String ? parse (Float64, results_df[i, :EstimationTime ]) : results_df[i, :EstimationTime ]
269
279
end
270
280
true_card[(data, get_query_id (string (experiment_params. dataset), results_df[i, :QueryPath ]))] = (results_df[i, :TrueCard ], current_x)
271
281
# push the errors and their groupings into the correct vector
272
282
push! (x_values, string (current_x))
273
- push! (y_values, current_y)
283
+ push! (y_values, typeof (current_y) == String ? parse (Float64, current_y) : current_y)
274
284
push! (estimators, current_group)
275
285
end
276
286
end
@@ -283,6 +293,9 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
283
293
card = query_card_and_size[1 ]
284
294
size = query_card_and_size[2 ]
285
295
for estimator in estimator_types
296
+ if (estimator == " lss" )
297
+ continue
298
+ end
286
299
comp_key = (data, estimator, query_path)
287
300
(estimate, runtime) = 1 , 60 # TODO : We shouldn't use an arbitrary number for runtime here
288
301
if haskey (comparison_results, comp_key)
@@ -302,15 +315,37 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
302
315
current_y = if y_type == estimate_error
303
316
min (10 ^ 30 , max (1 , estimate)) / card
304
317
else # y_type == runtime
305
- runtime / 1000.0
318
+ typeof (runtime) == String ? parse (Float64, runtime) / 1000 : runtime / 1000.0
306
319
end
320
+
307
321
# push the errors and their groupings into the correct vector
308
322
push! (x_values, string (current_x))
309
- push! (y_values, current_y)
323
+ push! (y_values, typeof (current_y) == String ? parse (Float64, current_y) : current_y)
310
324
push! (estimators, estimator)
311
325
end
312
326
end
313
327
328
+ # now handle leftover lss data
329
+ if (" lss" in estimator_types)
330
+ for results_key in keys (comparison_results)
331
+ # results_dict[(dataset, estimator, query_path)] = (Estimate=comparison_results[i, :Value], Runtime=comparison_results[i, :Runtime], QueryType=comparison_results[i,:QueryType])
332
+ # look for all the rows where the estimator is lss, then push the appropriate x and y values.
333
+ if (results_key[2 ] == " lss" )
334
+ current_results = comparison_results[results_key]
335
+ current_x = results_key[1 ]
336
+ current_y = if y_type == estimate_error
337
+ current_results[1 ]
338
+ else
339
+ current_results[2 ]
340
+ end
341
+ estimator = " lss"
342
+ push! (x_values, string (current_x))
343
+ push! (y_values, typeof (current_y) == String ? parse (Float64, current_y) : current_y)
344
+ push! (estimators, estimator)
345
+ end
346
+ end
347
+ end
348
+
314
349
if isnothing (x_order)
315
350
x_order = sort (unique (x_values))
316
351
end
@@ -409,13 +444,23 @@ function graph_grouped_bar_plot(experiment_params_list::Vector{ExperimentParams}
409
444
append! (x_values, [" aids" , " human" , " lubm80" , " dblp" , " eu2005" , " patents" , " yeast" , " youtube" ])
410
445
append! (y_values, [88 , 648 , 569 , 800 , 6600 , 6900 , 6300 , 3200 ])
411
446
append! (groups, [" alleyTPI" for _ in 1 : 8 ])
447
+ append! (x_values, [" aids" , " human" , " lubm80" , " dblp" , " eu2005" , " yeast" , " youtube" ])
448
+ append! (y_values, [9.023910 , 9.067842 , 9.018477 , 8.981142 , 9.010042 , 9.045878 , 8.992702 ]) # units of MB
449
+ append! (groups, [" lss" for _ in 1 : 7 ])
450
+
412
451
elseif y_type == build_time
413
452
append! (x_values, [" aids" , " human" , " lubm80" , " dblp" , " eu2005" , " patents" , " yeast" , " youtube" ])
414
453
append! (y_values, [.3 , 4.5 , 9.9 , .5 , 4.2 , 8.5 , .1 , 2.1 ])
415
454
append! (groups, [" sumrdf" for _ in eachindex (y_values)])
416
455
append! (x_values, [" aids" , " human" , " lubm80" , " dblp" , " eu2005" , " patents" , " yeast" , " youtube" ])
417
456
append! (y_values, [221 , 2518 , 17452 , 1061 , 14233 , 11738 , 35585 , 11044 ])
418
457
append! (groups, [" alleyTPI" for _ in 1 : 8 ])
458
+ append! (x_values, [" aids" , " human" , " lubm80" , " dblp" , " eu2005" , " yeast" , " youtube" ])
459
+ # append!(y_values, [1022.6, 29.5023, 3.6737, 3355.36, 492.89, 7047.44, 3130.0165]) # multithreaded results
460
+ append! (y_values, [2207.7717 , 50.2491 , 5.9976 , 8105.503 , 328.89 , 19839.2887 , 2309.733 ]) # single-threaded results
461
+ append! (groups, [" lss" for _ in 1 : 7 ])
462
+
463
+
419
464
end
420
465
for experiment_params in experiment_params_list
421
466
# load the results
0 commit comments