Skip to content

Commit 499c684

Browse files
committed
Fix cycle length fix
1 parent 324b02c commit 499c684

8 files changed

+239
-53
lines changed
+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
2+
using Profile
3+
include("../Experiments.jl")
4+
5+
datasets = [human, eu2005, dblp, youtube]
6+
datasets = [human, hprd]
7+
8+
experiment_params = Vector{ExperimentParams}()
9+
for dataset in datasets
10+
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
11+
dataset=dataset,
12+
partitioning_scheme=[(QuasiStable, 16), (NeighborNodeLabels, 8), (NodeLabels, 8)],
13+
description = "AvgMix32"))
14+
15+
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
16+
dataset=dataset,
17+
partitioning_scheme=[(QuasiStable, 32), (NeighborNodeLabels, 16), (NodeLabels, 16)],
18+
description = "AvgMix64"))
19+
#=
20+
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
21+
dataset=dataset,
22+
partitioning_scheme=[(QuasiStable, 64), (NeighborNodeLabels, 32), (NodeLabels, 32)],
23+
description = "AvgMix128"))
24+
25+
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
26+
dataset=dataset,
27+
partitioning_scheme=[(NodeLabels, 64)],
28+
description = "AvgN64"))
29+
30+
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
31+
dataset=dataset,
32+
partitioning_scheme=[(NeighborNodeLabels, 64)],
33+
description = "AvgNNL64"))
34+
35+
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
36+
dataset=dataset,
37+
partitioning_scheme=[(Degree, 64)],
38+
description = "AvgD64"))
39+
40+
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
41+
dataset=dataset,
42+
partitioning_scheme=[(QuasiStable, 64)],
43+
description = "AvgQ64"))
44+
45+
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
46+
dataset=dataset,
47+
partitioning_scheme=[(Hash, 64)],
48+
description = "AvgH64")) =#
49+
end
50+
51+
build_experiments(experiment_params)
52+
53+
run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
54+
55+
x_order = [string(data) for data in datasets]
56+
legend_order = [params.description for params in experiment_params][1:Int(length(experiment_params)/length(datasets))]
57+
58+
graph_grouped_box_plot(experiment_params;
59+
ylims=[10^-5, 10^4],
60+
y_ticks=[10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 10^0, 10^1, 10^2, 10^3, 10^4],
61+
y_type = runtime,
62+
x_type = dataset,
63+
x_order = x_order,
64+
legend_order=legend_order,
65+
grouping=description,
66+
dimensions = (700, 450),
67+
legend_pos=:top,
68+
y_label="Inference Latency 10^ (s)",
69+
filename="colorings_runtime")
70+
71+
graph_grouped_box_plot(experiment_params;
72+
ylims=[10^-21, 10^21],
73+
y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
74+
y_type = estimate_error,
75+
x_type = dataset,
76+
x_order = x_order,
77+
legend_order=legend_order,
78+
grouping=description,
79+
dimensions = (700, 450),
80+
legend_pos=:topleft,
81+
y_label="Relative Error 10^",
82+
filename="colorings_error")
83+
84+
85+
graph_grouped_bar_plot(experiment_params;
86+
grouping=description,
87+
y_type=memory_footprint,
88+
x_order = x_order,
89+
legend_order=legend_order,
90+
ylims=[0, 50],
91+
y_ticks = [10, 20, 30, 40, 50],
92+
legend_pos=:topright,
93+
dimensions = (1000, 550),
94+
y_label="Memory (MBs)",
95+
filename="colorings_memory")
96+
97+
graph_grouped_bar_plot(experiment_params;
98+
grouping=description,
99+
y_type=build_time,
100+
x_order = x_order,
101+
legend_order=legend_order,
102+
ylims=[0, 1600],
103+
y_ticks = [200, 400, 600, 800, 1000, 1200, 1400, 1600],
104+
dimensions = (1000, 550),
105+
y_label="Build Time (s)",
106+
filename="colorings_build_time")

Experiments/Scripts/comparison_exps.jl

+38-30
Original file line numberDiff line numberDiff line change
@@ -3,92 +3,100 @@ using Profile
33
include("../Experiments.jl")
44

55
datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
6+
#datasets = [human, aids, yeast, dblp, youtube, eu2005, patents]
7+
datasets = [human, youtube]
8+
9+
mix_scheme = [(QuasiStable, 32), (NeighborNodeLabels, 16), (NodeLabels, 16)]
610

711
experiment_params = Vector{ExperimentParams}()
812
for dataset in datasets
913
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
1014
dataset=dataset,
11-
partitioning_scheme=[(QuasiStable, 64)],
12-
description = "AvgQ64"))
13-
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
14-
dataset=dataset,
15-
partitioning_scheme=[(QuasiStable, 32), (NeighborNodeLabels, 32),(QuasiStable, 32), (NeighborNodeLabels, 32)],
16-
description = "AvgQ64N64"))
15+
partitioning_scheme=mix_scheme,
16+
description = "AvgMix64"))
1717
#=
1818
push!(experiment_params, ExperimentParams(deg_stats_type=MinDegStats,
1919
dataset=dataset,
20-
partitioning_scheme=[(QuasiStable, 64)],
20+
partitioning_scheme=mix_scheme,
2121
max_cycle_size = -1,
22-
description = "MinQ64"))
22+
description = "MinMix64"))
23+
2324
push!(experiment_params, ExperimentParams(deg_stats_type=MaxDegStats,
2425
dataset=dataset,
25-
partitioning_scheme=[(QuasiStable, 64)],
26+
partitioning_scheme=mix_scheme,
2627
max_cycle_size = -1,
27-
description = "MaxQ64"))
28+
description = "MaxMix64"))
2829
2930
push!(experiment_params, ExperimentParams(deg_stats_type=MaxDegStats,
3031
dataset=dataset,
3132
partitioning_scheme=[(Hash, 64)],
3233
max_cycle_size = -1,
3334
inference_max_paths = 10^30,
34-
use_partial_sums = false,
35-
description = "BSK"))
35+
summary_max_paths=1000,
36+
use_partial_sums =false,
37+
description = "BSK++")) =#
3638

3739
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
3840
dataset=dataset,
3941
partitioning_scheme=[(QuasiStable, 1)],
4042
max_cycle_size = -1,
41-
description = "IndEst")) =#
43+
description = "TradEst"))
4244
end
4345

4446
build_experiments(experiment_params)
4547

46-
run_estimation_experiments(experiment_params; timeout=1.0)
47-
48-
order = [string(data) for data in datasets]
48+
run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
49+
comparison_methods = ["alley", "wj", "impr", "jsub", "cs", "cset", "sumrdf"]
50+
x_order = [string(data) for data in datasets]
51+
legend_order = [params.description for params in experiment_params][1:Int(length(experiment_params)/ length(datasets))]
52+
legend_order = vcat(legend_order, comparison_methods)
4953

5054
graph_grouped_boxplot_with_comparison_methods(experiment_params;
5155
ylims=[10^-5, 10^4],
5256
y_ticks=[10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 10^0, 10^1, 10^2, 10^3, 10^4],
5357
y_type = runtime,
5458
x_type = dataset,
55-
x_order = order,
59+
x_order = x_order,
60+
legend_order = legend_order,
5661
grouping=description,
57-
dimensions = (1450, 550),
58-
legend_pos=:top,
62+
dimensions = (1550, 650),
63+
legend_pos=:topleft,
5964
y_label="Inference Latency 10^ (s)",
60-
filename="overall_runtime")
65+
filename="overall_runtime1")
6166

6267
graph_grouped_boxplot_with_comparison_methods(experiment_params;
6368
ylims=[10^-21, 10^21],
6469
y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
6570
y_type = estimate_error,
6671
x_type = dataset,
67-
x_order = order,
72+
x_order = x_order,
73+
legend_order = legend_order,
6874
grouping=description,
69-
dimensions = (1450, 550),
75+
dimensions = (1550, 650),
7076
legend_pos=:bottomleft,
7177
y_label="Relative Error 10^",
72-
filename="overall_error")
73-
78+
filename="overall_error1")
7479

7580
graph_grouped_bar_plot(experiment_params;
7681
grouping=description,
7782
y_type=memory_footprint,
78-
x_order = order,
83+
x_order = x_order,
84+
legend_order = legend_order,
7985
ylims=[0, 50],
8086
y_ticks = [10, 20, 30, 40, 50],
8187
legend_pos=:topright,
8288
dimensions = (1000, 550),
8389
y_label="Memory (MBs)",
84-
filename="overall_memory")
90+
filename="overall_memory1")
8591

8692
graph_grouped_bar_plot(experiment_params;
8793
grouping=description,
8894
y_type=build_time,
89-
x_order = order,
90-
ylims=[0, 1600],
91-
y_ticks = [200, 400, 600, 800, 1000, 1200, 1400, 1600],
95+
x_order = x_order,
96+
legend_order = legend_order,
97+
legend_pos=:topright,
98+
ylims=[0, 3500],
99+
y_ticks = [500, 1000, 1500, 2000, 2500, 3000],
92100
dimensions = (1000, 550),
93101
y_label="Build Time (s)",
94-
filename="overall_build_time")
102+
filename="overall_build_time1")

Experiments/Scripts/estimator-failure.jl

+43-10
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ include("../Experiments.jl")
22

33
#datasets = [human, aids]
44
datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
5+
#datasets = [human, aids, yeast, dblp, youtube, eu2005, patents]
56
queries = load_querysets(datasets)
67
num_queries = Dict(string(dataset)=>length(queries[dataset]) for dataset in datasets)
78

@@ -27,27 +28,28 @@ for method in methods
2728
inc!(failure_counts[method], string_dataset)
2829
end
2930
end
30-
failure_probabilities[method][dataset] = failure_counts[method][string_dataset] / num_queries[string_dataset]
31+
failure_probabilities[method][string_dataset] = failure_counts[method][string_dataset] / num_queries[string_dataset]
3132
end
3233
end
3334

3435
failure_counts["BSK"] = counter(String)
3536
failure_counts["BSK++"] = counter(String)
36-
failure_counts["AvgQ64"] = counter(String)
37+
failure_counts["AvgMix64"] = counter(String)
3738
failure_probabilities["BSK"] = Dict()
3839
failure_probabilities["BSK++"] = Dict()
39-
failure_probabilities["AvgQ64"] = Dict()
40+
failure_probabilities["AvgMix64"] = Dict()
4041
for dataset in datasets
4142
string_dataset = string(dataset)
4243
bsk_params = ExperimentParams(deg_stats_type=MaxDegStats,
4344
dataset=dataset,
4445
partitioning_scheme=[(Hash, 64)],
4546
max_cycle_size = -1,
4647
inference_max_paths = 10^30,
48+
summary_max_paths=1000,
4749
use_partial_sums = false,
4850
description = "BSK",
4951
n_replications = 1)
50-
run_estimation_experiments([bsk_params]; timeout=TIMEOUT_SEC)
52+
# run_estimation_experiments([bsk_params]; timeout=TIMEOUT_SEC)
5153
bsk_filename = params_to_results_filename(bsk_params)
5254
bsk_path = "Experiments/Results/Estimation_" * bsk_filename
5355
bsk_df = CSV.read(bsk_path, DataFrame; normalizenames=true)
@@ -64,10 +66,11 @@ for dataset in datasets
6466
partitioning_scheme=[(Hash, 64)],
6567
max_cycle_size = -1,
6668
inference_max_paths = 10^30,
69+
summary_max_paths=1000,
6770
use_partial_sums = true,
6871
description = "BSK++",
6972
n_replications=1)
70-
run_estimation_experiments([bsk_agg_params]; timeout=TIMEOUT_SEC)
73+
# run_estimation_experiments([bsk_agg_params]; timeout=TIMEOUT_SEC)
7174
bsk_agg_filename = params_to_results_filename(bsk_agg_params)
7275
bsk_agg_path = "Experiments/Results/Estimation_" * bsk_agg_filename
7376
bsk_agg_df = CSV.read(bsk_agg_path, DataFrame; normalizenames=true)
@@ -79,16 +82,46 @@ for dataset in datasets
7982
failure_probabilities["BSK++"][string_dataset] = failure_counts["BSK++"][string_dataset] / num_queries[string_dataset]
8083

8184

82-
83-
avg_params = ExperimentParams(dataset=dataset, n_replications=1)
84-
run_estimation_experiments([avg_params]; timeout=TIMEOUT_SEC)
85+
mix_scheme = [(QuasiStable, 32), (NeighborNodeLabels, 16), (NodeLabels, 16)]
86+
avg_params = ExperimentParams(dataset=dataset,
87+
n_replications=2,
88+
partitioning_scheme=mix_scheme)
89+
# build_experiments([avg_params])
90+
# run_estimation_experiments([avg_params]; timeout=TIMEOUT_SEC)
8591
avg_filename = params_to_results_filename(avg_params)
8692
avg_path = "Experiments/Results/Estimation_" * avg_filename
8793
avg_df = CSV.read(avg_path, DataFrame; normalizenames=true)
8894
for i in 1:nrow(avg_df)
8995
if avg_df[i, :Failure]
90-
inc!(failure_counts["AvgQ64"], string_dataset)
96+
inc!(failure_counts["AvgMix64"], string_dataset)
9197
end
9298
end
93-
failure_probabilities["AvgQ64"][string_dataset] = failure_counts["AvgQ64"][string_dataset] / num_queries[string_dataset]
99+
failure_probabilities["AvgMix64"][string_dataset] = failure_counts["AvgMix64"][string_dataset] / num_queries[string_dataset]
100+
end
101+
102+
estimators = ["cs", "wj", "jsub", "impr", "cset", "alley", "BSK", "BSK++", "sumrdf", "AvgMix64"]
103+
104+
global latex_table = """
105+
\\begin{table*}[]
106+
\\begin{tabular}{|l|l|l|l|l|l|l|l|l|l|l|}
107+
\\hline
108+
\\textbf{Dataset\\textbackslash{}Method} """
109+
for estimator in estimators
110+
global latex_table *= """& \\textbf{""" * string(estimator) * """} """
111+
end
112+
global latex_table *= """\\\\
113+
\\hline"""
114+
for dataset in datasets
115+
global latex_table *= """\\textbf{""" * string(dataset) * """} """
116+
for estimator in estimators
117+
global latex_table *= " & " * @sprintf("%.2f", failure_probabilities[estimator][string(dataset)])
118+
end
119+
global latex_table *= """\\\\ \\hline """
94120
end
121+
global latex_table *= """
122+
\\end{tabular}
123+
\\caption{Estimator Failure Rates}
124+
\\label{tbl:estimator-failure}
125+
\\end{table*}
126+
"""
127+
println(latex_table)

Experiments/Scripts/partial-sums-exps.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ using Plots.PlotMeasures
22
include("../Experiments.jl")
33

44
current_dataset = yeast
5-
max_paths = 9999999
5+
max_paths = -1
66

77
# The goal of this file is to demonstrate the significance of the partial sum optimization.
88
# We use the same datasets and summaries but we try estimating without partial sums, with partial
@@ -79,4 +79,4 @@ gbplot = groupedboxplot(x_values,
7979
xlabel!(gbplot, "Query Path Width")
8080
ylabel!(gbplot, "Inference Latency 10^ (s)")
8181
plotname = "partial-agg-exp.png"
82-
savefig(gbplot, "Experiments/Results/Figures/" * plotname)
82+
savefig(gbplot, "Experiments/Results/Figures/" * plotname)

0 commit comments

Comments
 (0)