Skip to content

Commit cfa1303

Browse files
committed
Add Tech Report
1 parent 7ac9c14 commit cfa1303

5 files changed

+89
-30
lines changed
1.55 MB
Binary file not shown.

Experiments/Scripts/comparison_exps.jl

+35-3
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,13 @@ end
4646

4747
println("Building...")
4848

49-
build_experiments(experiment_params)
49+
#build_experiments(experiment_params)
5050

5151
println("Estimating...")
5252

53-
run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
53+
#run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
5454

55-
comparison_methods = ["alley", "wj", "impr", "jsub", "cs", "cset", "sumrdf"]
55+
comparison_methods = ["alley", "alleyTPI", "wj", "impr", "jsub", "cs", "cset", "sumrdf"]
5656
x_order = [string(data) for data in datasets]
5757
legend_order = [params.description for params in experiment_params][1:Int(length(experiment_params)/ length(datasets))]
5858
legend_order = vcat(legend_order, comparison_methods)
@@ -113,3 +113,35 @@ graph_grouped_boxplot_with_comparison_methods(experiment_params;
113113
y_label="Relative Error log\$_{10}\$",
114114
x_label = "Query Size",
115115
filename="query_size_error")
116+
117+
118+
119+
comparison_methods = ["alleyTPI", "sumrdf"]
120+
x_order = [string(data) for data in datasets]
121+
legend_order = [params.description for params in experiment_params][1:Int(length(experiment_params)/ length(datasets))]
122+
legend_order = vcat(legend_order, comparison_methods)
123+
println("Graphing figure 4")
124+
125+
graph_grouped_bar_plot(experiment_params;
126+
grouping=description,
127+
y_type=memory_footprint,
128+
x_order = x_order,
129+
legend_order = legend_order,
130+
ylims=[0, 100],
131+
y_ticks = [20, 40, 60, 80, 100],
132+
legend_pos=:topright,
133+
dimensions = (1000, 550),
134+
y_label="Memory (MBs)",
135+
filename="overall_memory")
136+
137+
graph_grouped_bar_plot(experiment_params;
138+
grouping=description,
139+
y_type=build_time,
140+
x_order = x_order,
141+
legend_order = legend_order,
142+
legend_pos=:topleft,
143+
ylims=[0, 800],
144+
y_ticks = [100, 200, 300, 400, 500, 600, 700, 800],
145+
dimensions = (1000, 550),
146+
y_label="Build Time (s)",
147+
filename="overall_build_time")

Experiments/Scripts/estimator-failure.jl

+36-24
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,40 @@ include("../Experiments.jl")
33
#datasets = [human, aids]
44
datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
55
#datasets = [human, aids, yeast, dblp, youtube, eu2005, patents]
6-
datasets = [yeast]
6+
#datasets = [human, aids, yeast, dblp, youtube, ]
77
queries = load_querysets(datasets)
88
num_queries = Dict(string(dataset)=>length(queries[dataset]) for dataset in datasets)
99

1010
methods, comparison_results = comparison_dataset()
1111

1212
failure_counts = Dict()
1313
failure_probabilities = Dict()
14+
missing_counts = Dict()
15+
zero_counts = Dict()
1416
for method in methods
1517
failure_counts[method] = counter(String)
18+
missing_counts[method] = counter(String)
19+
zero_counts[method] = counter(String)
1620
failure_probabilities[method] = Dict()
1721
for dataset in datasets
1822
string_dataset = string(dataset)
19-
for query in queries[dataset]
20-
qid = get_query_id(string_dataset, query.query_path)
21-
comp_key = (string_dataset, method, qid)
22-
if !haskey(comparison_results, comp_key)
23-
inc!(failure_counts[method], string_dataset)
24-
elseif comparison_results[comp_key].Estimate == 0
25-
inc!(failure_counts[method], string_dataset)
26-
elseif comparison_results[comp_key].Estimate == Inf
27-
inc!(failure_counts[method], string_dataset)
28-
elseif comparison_results[comp_key].Estimate == NaN
29-
inc!(failure_counts[method], string_dataset)
23+
if method == "cset"
24+
failure_counts[method][string_dataset] = 0
25+
else
26+
for query in queries[dataset]
27+
qid = get_query_id(string_dataset, query.query_path)
28+
comp_key = (string_dataset, method, qid)
29+
if !haskey(comparison_results, comp_key)
30+
inc!(failure_counts[method], string_dataset)
31+
inc!(missing_counts[method], string_dataset)
32+
elseif comparison_results[comp_key].Estimate == 0
33+
inc!(failure_counts[method], string_dataset)
34+
inc!(zero_counts[method], string_dataset)
35+
elseif comparison_results[comp_key].Estimate == Inf
36+
inc!(failure_counts[method], string_dataset)
37+
elseif comparison_results[comp_key].Estimate == NaN
38+
inc!(failure_counts[method], string_dataset)
39+
end
3040
end
3141
end
3242
failure_probabilities[method][string_dataset] = failure_counts[method][string_dataset] / num_queries[string_dataset]
@@ -35,10 +45,10 @@ end
3545

3646
failure_counts["BSK"] = counter(String)
3747
failure_counts["BSK++"] = counter(String)
38-
failure_counts["AvgMix64"] = counter(String)
48+
failure_counts["AvgMix32"] = counter(String)
3949
failure_probabilities["BSK"] = Dict()
4050
failure_probabilities["BSK++"] = Dict()
41-
failure_probabilities["AvgMix64"] = Dict()
51+
failure_probabilities["AvgMix32"] = Dict()
4252
for dataset in datasets
4353
string_dataset = string(dataset)
4454
bsk_params = ExperimentParams(deg_stats_type=MaxDegStats,
@@ -83,30 +93,31 @@ for dataset in datasets
8393
failure_probabilities["BSK++"][string_dataset] = failure_counts["BSK++"][string_dataset] / num_queries[string_dataset]
8494

8595

86-
mix_scheme = [(QuasiStable, 32), (NeighborNodeLabels, 16), (NodeLabels, 16)]
96+
mix_scheme = [(Degree, 8), (QuasiStable, 8), (NeighborNodeLabels, 8), (NodeLabels, 8)]
8797
avg_params = ExperimentParams(dataset=dataset,
88-
n_replications=2,
89-
partitioning_scheme=mix_scheme)
98+
partitioning_scheme=mix_scheme,
99+
inference_max_paths=500)
90100
# build_experiments([avg_params])
91101
# run_estimation_experiments([avg_params]; timeout=TIMEOUT_SEC)
92102
avg_filename = params_to_results_filename(avg_params)
93103
avg_path = "Experiments/Results/Estimation_" * avg_filename
94104
avg_df = CSV.read(avg_path, DataFrame; normalizenames=true)
95105
for i in 1:nrow(avg_df)
96106
if avg_df[i, :Failure]
97-
inc!(failure_counts["AvgMix64"], string_dataset)
107+
inc!(failure_counts["AvgMix32"], string_dataset)
98108
end
99109
end
100-
failure_probabilities["AvgMix64"][string_dataset] = failure_counts["AvgMix64"][string_dataset] / num_queries[string_dataset]
110+
failure_probabilities["AvgMix32"][string_dataset] = failure_counts["AvgMix32"][string_dataset] / num_queries[string_dataset]
101111
end
102112

103-
estimators = ["cs", "wj", "jsub", "impr", "cset", "alley", "BSK", "BSK++", "sumrdf", "AvgMix64"]
113+
estimators = ["cs", "wj", "jsub", "impr", "cset", "alley", "alleyTPI", "BSK++", "sumrdf", "AvgMix32"]
104114

105115
global latex_table = """
106116
\\begin{table*}[]
117+
\\caption{Estimator Failure Rates}
107118
\\begin{tabular}{|l|l|l|l|l|l|l|l|l|l|l|}
108119
\\hline
109-
\\textbf{Dataset\\textbackslash{}Method} """
120+
\\textbf{Dataset\\textbackslash{}Method} \n"""
110121
for estimator in estimators
111122
global latex_table *= """& \\textbf{""" * string(estimator) * """} """
112123
end
@@ -115,13 +126,14 @@ global latex_table *= """\\\\
115126
for dataset in datasets
116127
global latex_table *= """\\textbf{""" * string(dataset) * """} """
117128
for estimator in estimators
118-
global latex_table *= " & " * @sprintf("%.2f", failure_probabilities[estimator][string(dataset)])
129+
failure_prob = failure_probabilities[estimator][string(dataset)]
130+
red_percent = Int(floor(100 * failure_prob^.5))
131+
global latex_table *= " & " * "\\cellcolor{red!" * string(red_percent) * "!green!50}" * @sprintf("%.2f", failure_prob)
119132
end
120-
global latex_table *= """\\\\ \\hline """
133+
global latex_table *= """\\\\ \\hline \n"""
121134
end
122135
global latex_table *= """
123136
\\end{tabular}
124-
\\caption{Estimator Failure Rates}
125137
\\label{tbl:estimator-failure}
126138
\\end{table*}
127139
"""
25 KB
Binary file not shown.

Experiments/graph_results.jl

+18-3
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ function comparison_dataset()
197197
QueryType=comparison_results[i,:QueryType])
198198
end
199199
estimators = unique(comparison_results[:, :Estimator])
200+
println(estimators)
200201
return estimators, results_dict
201202
end
202203

@@ -275,14 +276,13 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
275276
size = query_card_and_size[2]
276277
for estimator in estimator_types
277278
comp_key = (data, estimator, query_path)
278-
(estimate, runtime) = 1, 10 # TODO: We shouldn't use an arbitrary number for runtime here
279+
(estimate, runtime) = 1, 60 # TODO: We shouldn't use an arbitrary number for runtime here
279280
if haskey(comparison_results, comp_key)
280281
result = comparison_results[comp_key]
281282
estimate = result.Estimate
282283
runtime = result.Runtime
283284
else
284285
push!(estimator_dataset_missing, (estimator, data))
285-
continue
286286
end
287287

288288
current_x = if x_type == dataset
@@ -368,6 +368,21 @@ function graph_grouped_bar_plot(experiment_params_list::Vector{ExperimentParams}
368368
x_values = []
369369
y_values = Float64[]
370370
groups = []
371+
if y_type == memory_footprint
372+
append!(x_values, ["aids", "human", "lubm80", "dblp", "eu2005", "patents", "yeast", "youtube"])
373+
append!(y_values, [1.6, 0.1, 19.5, 2, 5.8, 28, .2, 7.8])
374+
append!(groups, ["sumrdf" for _ in 1:8])
375+
append!(x_values, ["aids", "human", "lubm80"])
376+
append!(y_values, [88, 648, 483])
377+
append!(groups, ["alleyTPI" for _ in 1:3])
378+
elseif y_type == build_time
379+
append!(x_values, ["aids", "human", "lubm80", "dblp", "eu2005", "patents", "yeast", "youtube"])
380+
append!(y_values, [.3, 4.5, 9.9, .5, 4.2, 8.5, .1, 2.1])
381+
append!(groups, ["sumrdf" for _ in eachindex(y_values)])
382+
append!(x_values, ["aids", "human", "lubm80"])
383+
append!(y_values, [49, 614, 313])
384+
append!(groups, ["alleyTPI" for _ in 1:3])
385+
end
371386
for experiment_params in experiment_params_list
372387
# load the results
373388
results_filename = params_to_results_filename(experiment_params)
@@ -516,4 +531,4 @@ function convert_dataset_to_string(data::DATASET)
516531
else
517532
return "unknown"
518533
end
519-
end
534+
end

0 commit comments

Comments
 (0)