-
Notifications
You must be signed in to change notification settings - Fork 15
/
sync_bandits.py
943 lines (777 loc) · 39.7 KB
/
sync_bandits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
"""
This file contains code for running all experiments in the paper
A. Kolobov, S. Bubeck, J. Zimmert. "Online Learning for Active Cache Synchronization." ICML-2020.
To reproduce the experiments in Figures 1 and 2 in the main text and Figures 3 and 4 in the Appendix,
run exp1(), exp2(), exp1a(), and exp2a() from this file, respectively. See the comments for these
methods for more details.
Method param_search() allows inspecting the performance of the algorithms from the experiments with
different hyperparameter value combinations. Hyperparamete ranges for the paper's experiments are
provided in that method's comments.
"""
from abc import ABC, abstractmethod
import scipy.stats
from scipy.stats import poisson
import numpy as np;
import scipy.integrate as integrate
import scipy.special as special
from scipy.optimize import minimize
import pprint
import time;
import math;
import matplotlib.pyplot as plt
import sys
np.set_printoptions(threshold=sys.maxsize)
PROBE_PLAY_IDX = 0
SYNC_PLAY_IDX = 1
# Abstract class for a cost generation process
class CostGen(ABC):
@abstractmethod
def __init__(self, num_arms, distrib_params):
super().__init__()
# The instantaneous cost
@abstractmethod
def c(self, intervals):
pass
"""Computes the average infinite-horizon cost of a play-rate-parameterized policy for the specified subset of arms.
Parameters
----------
r : float array
Play rates to use for all arms.
Returns
-------
float
Policy cost for the specified subset of arms, normalized by the number of arms in arms_filter.
"""
@abstractmethod
def J(self, r):
pass
"""Simulates arm plays for selected arms until a fixed horizon (epoch_start_time + epoch_len) and records the generated cost samples.
Parameters
----------
r : float array
Play rates to use for all arms.
arms_latest_play_times : int array
The timestamps of the latest simulated play times for all arms.
NOTE: arms_latest_play_times contents are modified by this method!
epoch_start_time : int
The start time of the epoch during which the arm plays are to be simulated.
epoch_len : int
The length of the epoch during which the arm plays are to be simulated.
epsilon : float
The probability of doing a probe play between any two consecutive sync plays.
Returns
-------
array of lists
An array of lists of [probe play cost, sync play cost] pairs.
"""
def sample_costs(self, r, arms_latest_play_times, epoch_start_time, epoch_len, epsilon):
histories = np.empty(len(r), dtype = object)
for k in range(len(r)):
arm_k_play_hist = list()
"""
For arm k, each iteration of this loop schedules a sync play time (1 / r[k]) after the previous
sync play, until the time of the next scheduled sync play is past the end of the current scheduling
epoch (epoch_start_time + epoch_len). With probability epsilon it also schedules a probe play between
the previous sync play and the one scheduled in this iteration, at a time chosen uniformly between
the two.
"""
while arms_latest_play_times[k] + 1/r[k] <= epoch_start_time + epoch_len:
# With prob. epsilon, schedule a probe play.
if np.random.binomial(1, epsilon, 1) > 0:
probe_play_time = np.random.uniform(0, 1/r[k])
# probe_and_sync_play_times's time stamps are relative to the previous scheduled sync play
probe_and_sync_play_times = np.array([probe_play_time, 1/r[k]])
# Sample a cost for the scheduled probe play and sync play.
values = self.c(k, probe_and_sync_play_times)
arm_k_play_hist.append(values)
else:
# If we happened to schedule no probe play before the next sync play, insert the "0" indicator
# instead of a probe play timestamp, and the indicator "-1" cost for it.
probe_and_sync_play_times = np.array([0, 1/r[k]])
values = self.c(k, probe_and_sync_play_times)
values[PROBE_PLAY_IDX] = -1
arm_k_play_hist.append(values)#list(values))
arms_latest_play_times[k] += 1/r[k]
histories[k] = arm_k_play_hist
return histories
"""Estimates the gradient of the cost functions for the selected arms.
Parameters
----------
r : float array
Play rates to use for all arms.
arms_latest_play_times : int array
The timestamps of the latest simulated play times for all arms.
NOTE: arms_latest_play_times contents are modified by this method!
epoch_start_time : int
The start time of the epoch during which the arm plays are to be simulated.
epoch_len : int
The length of the epoch during which the arm plays are to be simulated.
epsilon : float
The probability of doing a probe play between any two consecutive sync plays.
Returns
-------
est_grad_J : array of floats
An array representing the gradient. Value of 0 indicates that this dimension hasn't been reestimated
arms_with_new_grad : array of ints
An array of arm indices whose partial derivatives got estimated in this function call.
All other arms' partial derivative estimates are 0 and should be ignored.
"""
def estimate_grad_J(self, r, arms_latest_play_times, epoch_start_time, epoch_len, epsilon):
est_grad_J = np.zeros_like(r)
histories = self.sample_costs(r, arms_latest_play_times, epoch_start_time, epoch_len, epsilon)
arms_with_new_grad = []
for k in range(len(r)):
sum_est_grad_k = 0
# For each sync play, compute a J_k gradient estimate and add them all up.
for h in range(len(histories[k])):
if histories[k][h][PROBE_PLAY_IDX] != -1:
sum_est_grad_k += 1 / (epsilon * r[k]) * (histories[k][h][PROBE_PLAY_IDX] - histories[k][h][SYNC_PLAY_IDX])
else:
sum_est_grad_k += 0
# Average the gradient estimates and record the arms for which gradient estimates have been computed
if len(histories[k]) > 0:
est_grad_J[k] = (sum_est_grad_k / len(histories[k]))
arms_with_new_grad.append(k)
else:
est_grad_J[k] = 0
return est_grad_J / len(arms_with_new_grad), arms_with_new_grad
def sigmoid(x):
return 1/(1 + np.exp(-x))
class SublinPoly(CostGen):
A_K = 0
P_K = 1
NOISE = 2
"""
For each arm, initialization uses a prior to choose a distribution over time-dependent cost functions.
Namely, arm k will have a distribution over "capped" polynomials
c_k(t) = a_k * (t^p_k)
where
- a_k will be sampled from Uniform(mu_c_k - mu_c_k * noise, mu_c_k + mu_c_k * noise) at the cost function query time (in method c(.)),
"noise" from [0,1] is a parameter shared by all arms, and mu_c_k is a parameter chosen at initialization time from the prior Uniform(0, 1).
- p_k is a parameter chosen at initialization time from the prior sigmoid(scale * Uniform[0,1]), where "scaling" is a parameter shared by all arms.
I.e., p_k is from (0, 1), but p_k values > 0.5 are more likely.
"""
def __init__(self, num_arms, distrib_params):
assert(distrib_params["noise"] <= 1)
self.params = np.zeros((num_arms, 3))
# mu_c_k, uniform in [0,1)
self.params[:, SublinPoly.A_K] = np.random.rand(num_arms,)
# p_k, biased towards exponents between 0.5 and 1
self.params[:, SublinPoly.P_K] = sigmoid(distrib_params["scaling"] * np.random.rand(num_arms,))
self.params[:, SublinPoly.NOISE] = distrib_params["noise"]
super().__init__(num_arms, distrib_params)
def c(self, arm, intervals):
# c(t) = a_k*t^p_k, where p_k is in (0, 1)
noisy_scaling = np.random.uniform(self.params[arm, SublinPoly.A_K] - self.params[arm, SublinPoly.NOISE] * self.params[arm, SublinPoly.A_K], self.params[arm, SublinPoly.A_K] + self.params[arm, SublinPoly.NOISE] * self.params[arm, SublinPoly.A_K])
return noisy_scaling * intervals ** self.params[arm, SublinPoly.P_K]
def J(self, r):
# J(r) = sum_{k in arms_filter}[1 / |arms_filter| * r_k * a_k * (1 / p_k + 1) * (1 / r_k)^(p_k + 1)]
pol_cost = 1 / self.params.shape[0] * np.dot(r, self.params[:, SublinPoly.A_K] * (1 / (self.params[:, SublinPoly.P_K] + 1) * ((1 / r) ** (self.params[:, SublinPoly.P_K] + 1))))
return pol_cost
class BinaryPoisson(CostGen):
IMPORTANCE = 0
CHANGE_RATE = 1
def __init__(self, num_arms, distrib_params):
self.params = np.zeros((num_arms, 2))
# Web page importance scores; set them all to 1.0 for simplicity.
self.params[:, BinaryPoisson.IMPORTANCE] = np.full((num_arms,), 1.0)
# Web page change rates. Sample them uniformly from the interval [chrate_lo, chrate_hi].
self.params[:, BinaryPoisson.CHANGE_RATE] = np.random.uniform(low=distrib_params['chrate_lo'], high=distrib_params['chrate_hi'], size=(num_arms,))
super().__init__(num_arms, distrib_params)
def c(self, arm, intervals):
assert(len(intervals) == 2)
"""
Assume that the values in the "intervals" array are sorted in the
increasing order. This will ensure that we will essentially sample
a monotonically increasing function of inteval samples.
"""
results = np.zeros((len(intervals),))
samples = (0 if intervals[PROBE_PLAY_IDX] == 0 else poisson.rvs(self.params[arm, BinaryPoisson.CHANGE_RATE] * intervals[PROBE_PLAY_IDX], size=1))
results[PROBE_PLAY_IDX] = self.params[arm, BinaryPoisson.IMPORTANCE] * (1.0 if samples > 0 else 0.0)
if intervals[SYNC_PLAY_IDX] < intervals[PROBE_PLAY_IDX]:
raise ValueError("Intervals aren't in the increasing order of length")
elif intervals[SYNC_PLAY_IDX] == intervals[PROBE_PLAY_IDX]:
results[SYNC_PLAY_IDX] = samples
else:
samples = samples + poisson.rvs(self.params[arm, 1] * (intervals[SYNC_PLAY_IDX] - intervals[PROBE_PLAY_IDX]), size=1)
results[SYNC_PLAY_IDX] = self.params[arm, BinaryPoisson.IMPORTANCE] * (1.0 if samples > 0 else 0.0)
return results
def _C_exp(self, r):
# The expectation of a binary indicator over a Poisson distribution = mu_k * e^(delta_k / r_k) + 1 / r_k - 1 / delta_k for each arm (page) k, where mu_k is the page importance and delta_k is the page change rate.
return self.params[:, BinaryPoisson.IMPORTANCE] * (1.0 / r + np.exp(- self.params[:, BinaryPoisson.CHANGE_RATE] / r) / self.params[:, BinaryPoisson.CHANGE_RATE] - 1.0 / self.params[:, BinaryPoisson.CHANGE_RATE])
def J(self, r):
# J(r) = sum_{k in arms_filter}[1 / |arms_filter| * r_k * _C_exp(arms_filter, r)]
pol_cost = 1.0 / self.params.shape[0] * np.dot(r, self._C_exp(r))
return pol_cost
"""Computes the optimal policy cost. Since the policy cost function and constraint region is convex, this can be done via simple convex optimization.
Parameters
----------
arms : int array
Indices corresponding to arms that should be taken into account in this computation.
c : CostGen
A description of a family of cost-generating processes.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
B : float
Maximum total sync play rate.
Returns
-------
array of lists
An array of lists of [probe play cost, sync play cost] pairs.
"""
def compute_opt_policy_cost(c, rmin, rmax, B):
num_arms = c.params.shape[0]
bnds=[[rmin, rmax] for k in range(num_arms)]
obj_func = (lambda r : c.J(r))
start_time = time.time()
argm = minimize(obj_func, x0 = np.full((num_arms, 1), B / num_arms), args=(), method="SLSQP", bounds=bnds, constraints={"fun": (lambda r : B - sum(r)), "type": "ineq"}, tol=None, callback=None, options=None)
end_time = time.time()
return c.J(argm.x)
"""The MirrorSync algorithm. See the ICML-2020 paper for a detailed description.
Parameters
----------
c : CostGen
A description of a family of cost-generating processes.
num_arms : int
Number of arms.
learning_rate : float
Learning rate.
num_rounds : int
Number of learning rounds (\mathcal{T}_max in the ICML-2020 paper).
epsilon : float
The fraction of total bandwidth allocated to probe plays.
B_frac : float
Maximum total sync play rate as a fraction of the number of arms.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
silent: boolean
If True, suppresses most of the output.
Returns
-------
array of floats
Policy costs before the first learning round and at the end of each learning round, (num_rounds + 1) values in total.
"""
def mirror_sync(c, num_arms = 100, learning_rate = 0.05, num_rounds = 50, epsilon = 0.1, B_frac = 0.2, rmin = 0.001, rmax = 100, silent = True):
print("Running MirrorSync")
"""
Since we will be doing exploratory probe plays in addition to regular sync plays, we need to adjust the maximum play rate, which will apply to sync plays only.
"""
rmax = 1 / (1 + epsilon) * rmax
B_total = B_frac * num_arms
"""
The total play rate constraint will apply only to sync plays too, we we need to adjust it to account for probe plays as well, as in the case with rmax.
"""
B = 1 / (1 + epsilon) * B_total
results = np.zeros((num_rounds + 1,))
est_grad_J = np.zeros((num_arms,))
r = np.zeros((num_arms,))
r = mirror_descent_step(r, est_grad_J / num_arms, learning_rate, rmin, rmax, B)
J = c.J(r)
if not silent:
print('Initial J value is ', J)
results[0] = J
for i in range(num_rounds):
est_grad_J, arms_with_new_grads = c.estimate_grad_J(r, np.zeros_like(r), 0, 1 / rmin, epsilon)
assert num_arms == len(arms_with_new_grads), "In MirrorSync, every arm must get a new gradient estimate in every round! Num arms: %r, num arms with new gradient estimates: %r" % (len(arms), len(arms_temp))
r = mirror_descent_breg_step(r, est_grad_J, range(num_arms), learning_rate, rmin, rmax, B)
J = c.J(r)
results[i + 1] = J
if (not silent) or (i == num_rounds - 1):
print('Update round %r: J_round is %r'%(i + 1, J))
if not silent:
print('Per-update-round Js are ')
pprint.pprint(results)
return results
"""The AsynMirrorSync/AsyncPSGDSync algorithm. See the ICML-2020 paper for a detailed description.
Parameters
----------
c : CostGen
A description of a family of cost-generating processes.
alg : string
'ms' for AsynMirrorSync, 'psgd' for AsyncPSGDSync.
num_arms : int
Number of arms.
learning_rate : float
Learning rate.
num_rounds : int
Number of equivalent MirrorSync learning rounds (\mathcal{T}_max in the ICML-2020 paper).
epsilon : float
The fraction of total bandwidth allocated to probe plays.
update_cycle_length : int
Length of time between rate update attempts.
NOTE: For ease of comparison to MirrorSync's performance, we assume that (1 / rmin) is a multiple of update_cycle_length.
B_frac : float
Maximum total sync play rate as a fraction of the number of arms.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
silent: boolean
If True, suppresses most of the output.
Returns
-------
array of floats
Policy costs before the first learning round and at the end of each learning round, (num_rounds + 1) values in total.
"""
def async_mirror_sync(c, alg, num_arms = 100, learning_rate = 0.05, num_rounds = 50, epsilon = 0.1, update_cycle_length = 2, B_frac = 0.2, rmin = 0.001, rmax = 100, silent = True):
if alg == 'ms':
alg_name = 'AsyncMirrorSync'
elif alg == 'psgd':
alg_name = 'AsyncPSGDSync'
else:
raise ValueError('Invalid flavor of AsyncMirrorSync %r.'%(alg))
print('Running', alg_name)
rmax = 1 / (1 + epsilon) * rmax
B_total = B_frac * num_arms
B = 1 / (1 + epsilon) * B_total
if not silent:
print('Update cycle length: %r'%(update_cycle_length))
"""
We will record policy cost after every update_cycle_length, which can be much smaller than MirrorSync's round length = 1 / rmin.
For ease of comparison to MirrorSync's performance, we assume that (1 / rmin) is a multiple of update_cycle_length.
"""
results = np.zeros((num_rounds * math.ceil((1 / rmin) / update_cycle_length) + 1,))
r = np.zeros((num_arms,))
r = mirror_descent_step(r, np.zeros((num_arms,)) / num_arms, learning_rate, rmin, rmax, B)
J = c.J(r)
if not silent:
print('Current J value: %r'%(J))
results[0] = J
cycle_num = 1
t_now = 0
time_of_prev_update = t_now
arms_latest_play_times = np.full_like(r, t_now)
# Repeat this for the amount of time equivalent to num_rounds MirrorSync rounds.
while t_now < num_rounds * (1 / rmin):
t_now += update_cycle_length
# Estimate the new gradient for those arms that have generated enough cost samples since the previous time their play rate was updated.
est_grad_J_local, arms_with_new_grad = c.estimate_grad_J(r, arms_latest_play_times, time_of_prev_update, t_now - time_of_prev_update, epsilon)
"""
NOTE: the total bandwidth available to the arms with new gradient estimates for re-distribution in this cycle will be their current total
play rate, _not_ the total bandwidth constraint.
"""
B_current_update = sum(r[arms_with_new_grad])
if (len(arms_with_new_grad) > 0):
r_new = np.zeros_like(r)
# AsyncMirrorSync and AsyncPSGDSync differ only in the algorithm they use to update the play rates (mirror descent for the former, projected SGD for the latter.)
if (alg == 'ms'):
r_new[arms_with_new_grad] = mirror_descent_breg_step(r, est_grad_J_local, arms_with_new_grad, learning_rate, rmin, rmax, B_current_update)
elif (alg == 'psgd'):
r_new[arms_with_new_grad] = projected_sgd_step(r, est_grad_J_local, arms_with_new_grad, learning_rate, rmin, rmax, B_current_update)
else:
raise ValueError('Invalid or no optimizer specified.')
for k in arms_with_new_grad:
# We do this to make sure that the last scheduled play is in the next cycle. Note that this play doesn't contribute to gradient estimation.
if np.random.binomial(1, epsilon, 1) > 0:
arms_latest_play_times[k] = t_now
elif (1 / r_new[k]) < (1 / r[k]) and arms_latest_play_times[k] + (1 / r_new[k]) >= t_now:
arms_latest_play_times[k] = arms_latest_play_times[k] + 1 / r_new[k]
else:
arms_latest_play_times[k] = arms_latest_play_times[k] + 1 / r[k]
r[k] = r_new[k]
time_of_prev_update = t_now
J = c.J(r)
results[cycle_num] = J
if (not silent) or (t_now >= num_rounds * (1 / rmin)):
print('Update cycle %r: J_cycle is %r'%(cycle_num, J))
cycle_num += 1
if not silent:
print('Per-update-cycle Js are ')
pprint.pprint(results)
return results
def log_barrier(r):
return np.sum(-np.log(r))
def bregman_div(r, r_prev):
return np.sum(-np.log(r/r_prev) + r/r_prev - 1)
"""One step of lazy mirror descent with log barrier function.
Parameters
----------
curr_r : float array
Current sync play rates.
est_grad_J : float array
Gradient estimate at curr_r.
learning_rate : float
Learning rate.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
B : float
Maximum total sync play rate.
Returns
-------
float array
New sync play rates.
"""
def mirror_descent_step(curr_r, est_grad_J, learning_rate, rmin, rmax, B):
assert(len(curr_r) == len(est_grad_J))
bnds=[[rmin, rmax] for i in range(len(curr_r))]
obj_func = (lambda r : learning_rate * np.dot(est_grad_J, r) + log_barrier(r))
start_time = time.time()
argm = minimize(obj_func, x0 = curr_r, args=(), method="SLSQP", bounds=bnds, constraints={"fun": (lambda r : B - sum(r)), "type": "ineq"}, tol=None, callback=None, options=None)
end_time = time.time()
return argm.x
"""One step of mirror descent with log barrier function.
Parameters
----------
curr_r : float array
Current sync play rates.
est_grad_J : float array
Gradient estimate at curr_r.
arms : int array
The arms to which this function call should apply
(mirror_descent_breg_step can be invoked on only a subset of all arms).
learning_rate : float
Learning rate.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
B : float
Maximum total sync play rate.
Returns
-------
float array
New sync play rates for arms whose indices are specified by the "arms" parameter.
"""
def mirror_descent_breg_step(curr_r, est_grad_J, arms, learning_rate, rmin, rmax, B):
est_grad_J_filtered = est_grad_J[arms]
r_prev_filtered = curr_r[arms]
bnds=[[rmin, rmax] for i in arms]
obj_func = (lambda r : learning_rate * np.dot(est_grad_J_filtered, r) + bregman_div(r, r_prev_filtered))
start_time = time.time()
argm = minimize(obj_func, x0 = curr_r[arms], args=(), method="SLSQP", bounds=bnds, constraints={"fun": (lambda r : B - sum(r)), "type": "ineq"}, tol=None, callback=None, options=None)
end_time = time.time()
return argm.x
"""One step of projected SGD. It performs 1 SGD update and projects the result into the constraint region.
Parameters
----------
curr_r : float array
Current sync play rates.
est_grad_J : float array
Gradient estimate at curr_r.
arms : int array
The arms to which this function call should apply
(projected_sgd_step can be invoked on only a subset of all arms).
learning_rate : float
Learning rate.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
B : float
Maximum total sync play rate.
Returns
-------
float array
New sync play rates for arms whose indices are specified by the "arms" parameter.
"""
def projected_sgd_step(curr_r, est_grad_J, arms, learning_rate, rmin, rmax, B):
est_grad_J_filtered = est_grad_J[arms]
r_prev_filtered = curr_r[arms]
r_new = r_prev_filtered - learning_rate * est_grad_J_filtered
bnds=[[rmin, rmax] for i in arms]
obj_func = (lambda r : np.sum(np.square(r_new - r)))
start_time = time.time()
argm = minimize(obj_func, x0 = r_new, args=(), method="SLSQP", bounds=bnds, constraints={"fun": (lambda r : B - sum(r)), "type": "ineq"}, tol=None, callback=None, options=None)
end_time = time.time()
return argm.x
#def param_search(alg='MirrorSync', learning_rates=[5, 7, 9, 11, 13, 15, 17, 19], update_cycle_lengths=[40], num_arms=100, horizon=30, epsilon=0.2, B_frac=0.4, rmin=0.025, rmax=3):
#def param_search(alg='AsyncMirrorSync', learning_rates=[0.5, 1, 3, 6, 9, 11], update_cycle_lengths=[8, 10, 20, 40], num_arms=100, horizon=30, epsilon=0.2, B_frac=0.4, rmin=0.025, rmax=3):
#def param_search(alg='AsyncPSGDSync', learning_rates=[0.04, 0.06, 0.08, 0.1, 0.2, 0.3, 0.4], update_cycle_lengths=[8, 10, 20, 40], num_arms=100, horizon=30, epsilon=0.2, B_frac=0.4, rmin=0.025, rmax=3):
poly_params = { "scaling" : 5, "noise" : 0.1 }
binary_Poisson_params = { "chrate_lo" : 0.005, "chrate_hi" : 5 }
exp1_num_arms = 100
"""
Run this method with the default parameter values to produce the results in Figure 1 of the ICML-2020 paper
(MirrorSync vs. AsyncMirrorSync evaluated on polynomial cost-generating processes).
See the description of the exp1helper for parameter explanations.
The method prints strings with experiment results that can be used to produce plots such as in the paper's figures using pgfplots.
"""
def exp1(cost_fun=SublinPoly, distrib_params=poly_params, num_arms=exp1_num_arms, num_runs=150, update_cycle_length=20, horizon=240, l_r_ms= 2.7, l_r_ams=1.6, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=3):
exp1helper(cost_fun, distrib_params, num_arms, num_runs, update_cycle_length, horizon, l_r_ms, l_r_ams, epsilon, B_frac, rmin, rmax)
"""
Run this method with the default parameter values to produce the results in Figure 3 of the ICML-2020 paper
(MirrorSync vs. AsyncMirrorSync evaluated on Poisson-process-based cost-generating processes).
See the description of the exp1helper for parameter explanations.
The method prints strings with experiment results that can be used to produce plots such as in the paper's figures using pgfplots.
"""
def exp1a(cost_fun=BinaryPoisson, distrib_params=binary_Poisson_params, num_arms=exp1_num_arms, num_runs=150, update_cycle_length=8, horizon=240, l_r_ms=5, l_r_ams=1.3, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=6):
exp1helper(cost_fun, distrib_params, num_arms, num_runs, update_cycle_length, horizon, l_r_ms, l_r_ams, epsilon, B_frac, rmin, rmax)
"""A helper method for running the experiments in Figures 1 and 3 of the ICML-2020 paper that compare MirrorSync's and AsyncMirrorSync's performance.
Parameters
----------
cost_fun : a descendant class of CostGen
A class describing a family of cost-generating processes.
distrib_params : dictionary
Parameters for the cfamily of cost-generating processes given by cost_fun.
num_arms : int
Number of arms.
num_runs : int
Number of runs of each algorithm to perform. The two algorithms' runs are paired, i.e., in each run both algorithms are executed on the same
cost_fun instance initialized for that run.
update_cycle_length : int
Length of time between rate update attempts of AsyncMirrorSync.
NOTE: For ease of comparison to MirrorSync's performance, we assume that (1 / rmin) is a multiple of update_cycle_length.
horizon : int
Number of equivalent MirrorSync learning rounds (\mathcal{T}_max for MirrorSync in the ICML-2020 paper).
l_r_ms : float
Learning rate for MirrorSync.
l_r_ams : float
Learning rate for AsyncMirrorSync.
epsilon : float
The fraction of total bandwidth allocated to probe plays.
B_frac : float
Maximum total sync play rate as a fraction of the number of arms.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
Returns
-------
None
The method prints strings with experiment results that can be used to produce plots such as in the paper's figures using pgfplots.
"""
def exp1helper(cost_fun, distrib_params, num_arms, num_runs, update_cycle_length, horizon, l_r_ms, l_r_ams, epsilon, B_frac, rmin, rmax):
STEP_MULT = (1 / rmin) / update_cycle_length
# A list of arrays that will hold the policy cost after every policy update round of every run of MirrorSync.
ms_runs = []
# A list of arrays that will hold the policy cost after every policy update round of every run of AsyncMirrorSync.
ams_runs = []
# A list that will hold the optimal epsilon-exploration policy cost for every run. It can vary from run to run due to stochastic cost_fun initialization.
opt_epsilon_cost = []
for run in range(num_runs):
print('\n\n=========== RUN #%r ============'%(run + 1))
"""
Initialize a family of cost-generating processes.
NOTE: the initialization may be stochastic, and therefore is done for every run separately.
"""
c = cost_fun(num_arms, distrib_params)
opt_epsilon_cost_curr = compute_opt_policy_cost(c, rmin, rmax / (1 + epsilon), B_frac * num_arms / (1 + epsilon))
opt_epsilon_cost.append(opt_epsilon_cost_curr)
print('Optimal epsilon-exploration periodic policy cost: %r'%(opt_epsilon_cost_curr))
results_ms = mirror_sync(c, num_arms, l_r_ms, horizon, epsilon, B_frac, rmin, rmax, silent=True)
results_ams = async_mirror_sync(c, 'ms', num_arms, l_r_ams, horizon, epsilon, update_cycle_length, B_frac, rmin, rmax, silent=True)
ms_runs.append(results_ms)
ams_runs.append(results_ams)
if (run + 1) % 10 == 0 or (run + 1) == num_runs:
print('\n\n********** RESULTS AFTER %r RUNS *************'%(run + 1))
ms_ms, ms_hs = process_results(ms_runs)
ms_plot_string = ''
for i in range(ms_ms.shape[0]):
ms_plot_string += '(' + str(i) + ',' + str(ms_ms[i]) + ')+=(0,' + str(ms_hs[i]) + ')-=(0,' + str(ms_hs[i]) + ')\n'
print('MirrorSync:\n', ms_plot_string, '\n\n')
ams_ms, ams_hs = process_results(ams_runs)
ams_plot_string = ''
for i in range(ams_ms.shape[0]):
ams_plot_string += '(' + str(i / STEP_MULT) + ',' + str(ams_ms[i]) + ')+=(0,' + str(ams_hs[i]) + ')-=(0,' + str(ams_hs[i]) +')\n'
print('AsyncMirrorSync:\n', ams_plot_string, '\n\n')
opt_epsilon_cost_string = ''
opt_epsilon_cost_mean = np.mean(opt_epsilon_cost)
for i in range(horizon + 1):
opt_epsilon_cost_string += '(' + str(i) + ',' + str(opt_epsilon_cost_mean) + ')\n'
print('Optimal epsilon-exploration periodic policy cost:\n', opt_epsilon_cost_string, '\n\n')
print('*************************************************')
exp2_num_arms = 100
"""
Run this method with the default parameter values to produce the results in Figure 2 of the ICML-2020 paper
(AsyncMirrorSync vs. AsyncPSGDSync evaluated on polynomial cost-generating processes).
See the description of the exp2helper for parameter explanations.
The method prints strings with experiment results that can be used to produce plots such as in the paper's figures using pgfplots.
"""
def exp2(cost_fun=SublinPoly, distrib_params=poly_params, num_arms=exp2_num_arms, num_runs=150, update_cycle_length_psgd=20, update_cycle_length_ams=20, horizon=240, l_r_psgd= 0.08, l_r_ams=1.6, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=3):
exp2helper(cost_fun, distrib_params, num_arms, num_runs, update_cycle_length_psgd, update_cycle_length_ams, horizon, l_r_psgd, l_r_ams, epsilon, B_frac, rmin, rmax)
"""
Run this method with the default parameter values to produce the results in Figure 4 of the ICML-2020 paper
(AsyncMirrorSync vs. AsyncPSGDSync evaluated on Poisson-process-based cost-generating processes).
See the description of the exp2helper for parameter explanations.
The method prints strings with experiment results that can be used to produce plots such as in the paper's figures using pgfplots.
"""
def exp2a(cost_fun=BinaryPoisson, distrib_params=binary_Poisson_params, num_arms=exp2_num_arms, num_runs=150, update_cycle_length_psgd=40, update_cycle_length_ams=8, horizon=240, l_r_psgd= 0.5, l_r_ams=1.3, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=6):
exp2helper(cost_fun, distrib_params, num_arms, num_runs, update_cycle_length_psgd, update_cycle_length_ams, horizon, l_r_psgd, l_r_ams, epsilon, B_frac, rmin, rmax)
"""A helper method for running the experiments in Figures 2 and 4 of the ICML-2020 paper that compare AsyncMirrorSync's and AsyncPSGDSync's performance.
Parameters
----------
cost_fun : a descendant class of CostGen
A class describing a family of cost-generating processes.
distrib_params : dictionary
Parameters for the cfamily of cost-generating processes given by cost_fun.
num_arms : int
Number of arms.
num_runs : int
Number of runs of each algorithm to perform. The two algorithms' runs are paired, i.e., in each run both algorithms are executed on the same
cost_fun instance initialized for that run.
update_cycle_length_psgd : int
Length of time between rate update attempts of AsyncPSGDSync.
NOTE: For ease of comparison, we assume that (1 / rmin) is a multiple of update_cycle_length_psgd.
update_cycle_length_ams : int
Length of time between rate update attempts of AsyncMirrorSync.
NOTE: For ease of comparison, we assume that (1 / rmin) is a multiple of update_cycle_length_ams.
horizon : int
Number of equivalent MirrorSync learning rounds (\mathcal{T}_max for MirrorSync in the ICML-2020 paper).
l_r_psgd : float
Learning rate for AsyncPSGDSync.
l_r_ams : float
Learning rate for AsyncMirrorSync.
epsilon : float
The fraction of total bandwidth allocated to probe plays.
B_frac : float
Maximum total sync play rate as a fraction of the number of arms.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
Returns
-------
None
The method prints strings with experiment results that can be used to produce plots such as in the paper's figures using pgfplots.
"""
def exp2helper(cost_fun, distrib_params, num_arms, num_runs, update_cycle_length_psgd, update_cycle_length_ams, horizon, l_r_psgd, l_r_ams, epsilon, B_frac, rmin, rmax):
STEP_MULT_MD = (1 / rmin) / update_cycle_length_ams
STEP_MULT_SGD = (1 / rmin) / update_cycle_length_psgd
# A list of arrays that will hold the policy cost after every policy update round of every run of AsyncPSGDSync.
psgd_runs = []
# A list of arrays that will hold the policy cost after every policy update round of every run of AsyncMirrorSync.
ams_runs = []
# A list that will hold the optimal epsilon-exploration policy cost for every run. It can vary from run to run due to stochastic cost_fun initialization.
opt_epsilon_cost = []
for run in range(num_runs):
print('\n\n=========== RUN #%r ============'%(run + 1))
"""
Initialize a family of cost-generating processes.
NOTE: the initialization may be stochastic, and therefore is done for every run separately.
"""
c = cost_fun(num_arms, distrib_params)
opt_epsilon_cost_curr = compute_opt_policy_cost(c, rmin, rmax / (1 + epsilon), B_frac * num_arms/(1 + epsilon))
opt_epsilon_cost.append(opt_epsilon_cost_curr)
print('Optimal epsilon-exploration periodic policy cost: %r'%(opt_epsilon_cost_curr))
results_psgd = async_mirror_sync(c, 'psgd', num_arms, l_r_psgd, horizon, epsilon, update_cycle_length_psgd, B_frac, rmin, rmax, silent=True)
results_ams = async_mirror_sync(c, 'ms', num_arms, l_r_ams, horizon, epsilon, update_cycle_length_ams, B_frac, rmin, rmax, silent=True)
psgd_runs.append(results_psgd)
ams_runs.append(results_ams)
if (run + 1) % 10 == 0 or (run + 1) == num_runs:
print('\n\n********** RESULTS AFTER %r RUNS *************'%(run + 1))
psgd_ms, psgd_hs = process_results(psgd_runs)
psgd_plot_string = ''
for i in range(psgd_ms.shape[0]):
psgd_plot_string += '(' + str(i / STEP_MULT_SGD) + ',' + str(psgd_ms[i]) + ')+=(0,' + str(psgd_hs[i]) + ')-=(0,' + str(psgd_hs[i]) + ')\n'
print('AsyncPSGDSync:\n', psgd_plot_string, '\n\n')
ams_ms, ams_hs = process_results(ams_runs)
ams_plot_string = ''
for i in range(ams_ms.shape[0]):
ams_plot_string += '(' + str(i / STEP_MULT_MD) + ',' + str(ams_ms[i]) + ')+=(0,' + str(ams_hs[i]) + ')-=(0,' + str(ams_hs[i]) + ')\n'
print('AsyncMirrorSync:\n', ams_plot_string, '\n\n')
opt_epsilon_cost_string = ''
opt_epsilon_cost_mean = np.mean(opt_epsilon_cost)
for i in range(horizon + 1):
opt_epsilon_cost_string += '(' + str(i) + ',' + str(opt_epsilon_cost_mean) + ')\n'
print('Optimal epsilon-exploration periodic policy cost: ', opt_epsilon_cost_string, '\n\n')
print('*************************************************')
# Compute means and confidence intervals. Adapted from
# https://github.com/microsoft/Optimal-Freshness-Crawl-Scheduling/blob/master/LambdaCrawlExps.py
def mean_confidence_interval(data, confidence = 0.95):
a = 1.0 * data
n = len(a)
m, se = np.mean(a), scipy.stats.sem(a)
h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
return m, h
def process_results(runs):
runs = np.array(runs)
ms = np.zeros_like(runs[0,:])
hs = np.zeros_like(ms)
for j in range(runs.shape[1]):
m, h = mean_confidence_interval(runs[:,j])
ms[j] = m
hs[j] = h
return ms, hs
"""Hyperparameter search for the experiments in the ICML-2020 paper. This method performs one run of the specified algorithm for a fixed seed
for every combination of values in learning_rates and update_cycle_lengths lists, keeping the other specified parameters such as horizon fixed.
Parameters
----------
cost_fun : a descendant class of CostGen
A class describing a family of cost-generating processes.
distrib_params : dictionary
Parameters for the cfamily of cost-generating processes given by cost_fun.
alg : string
Algorithm name: 'MirrorSync', 'AsyncMirrorSync' or 'AsyncPSGDSync'
learning_rates : float array
Learning rates to try.
update_cycle_lengths_psgd : int array
Update cycle lengths to try. For MirrorSync, this list should contain just one entry equal to (1 / rmin).
num_arms : int
Number of arms.
horizon : int
Number of equivalent MirrorSync learning rounds (\mathcal{T}_max for MirrorSync in the ICML-2020 paper).
epsilon : float
The fraction of total bandwidth allocated to probe plays.
B_frac : float
Maximum total sync play rate as a fraction of the number of arms.
rmin : float
Minimum allowed arm sync play rate.
rmax : float
Maximum allowed arm sync play rate.
Returns
-------
None
The method prints, for every hyperparameter combination, policy costs after every play rate update and, at the end of hyperparameter search,
policy cost vs. number of play rate update plots for all hyperparameter combinations.
"""
# ***** Parameter ranges for Figures 1 and 2 in the ICML-2020 paper.******
#param_search(cost_fun=SublinPoly, distrib_params=poly_params, alg='MirrorSync', learning_rates=[0.5, 1, 2, 2.3, 2.5, 2.7, 3, 3.3, 4, 5, 6, 7, 9, 11, 13, 15, 17], update_cycle_lengths=[40], num_arms=100, horizon=240, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=3)
#param_search(cost_fun=SublinPoly, distrib_params=poly_params, alg='AsyncMirrorSync', learning_rates=[1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7], update_cycle_lengths=[8, 10, 20, 40], num_arms=100, horizon=240, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=3)
#param_search(cost_fun=SublinPoly, distrib_params=poly_params, alg='AsyncPSGDSync', learning_rates=[0.04, 0.06, 0.08, 0.1, 0.2, 0.3, 0.4], update_cycle_lengths=[8, 10, 20, 40], num_arms=100, horizon=240, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=3)
# ***** Parameter ranges for Figures 3 and 4 in the ICML-2020 paper.******
#param_search(cost_fun=BinaryPoisson, distrib_params=binary_Poisson_params, alg='MirrorSync', learning_rates=[3, 3.3, 4, 5, 6, 7, 9, 11, 13], update_cycle_lengths=[40], num_arms=100, horizon=240, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=6)
#param_search(cost_fun=BinaryPoisson, distrib_params=binary_Poisson_params, alg='AsyncMirrorSync', learning_rates=[1.1, 1.0, 1.3, 1.2, 0.9, 0.8], update_cycle_lengths=[8, 10, 20, 40], num_arms=100, horizon=240, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=6)
#param_search(cost_fun=BinaryPoisson, distrib_params=binary_Poisson_params, alg='AsyncPSGDSync', learning_rates=[0.08, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], update_cycle_lengths=[8, 10, 20, 40], num_arms=100, horizon=240, epsilon=0.05, B_frac=0.4, rmin=0.025, rmax=6)
def param_search(cost_fun, distrib_params, alg, learning_rates, update_cycle_lengths, num_arms, horizon, epsilon, B_frac, rmin, rmax):
assert(alg == 'MirrorSync' or alg == 'AsyncMirrorSync' or alg == 'AsyncPSGDSync')
figctr = 0
print("Evaluating ", alg)
for learning_rate in learning_rates:
tried_this_learning_rate = False
for update_cycle_length in update_cycle_lengths:
# The seed is fixed for the purposes of hyperparameter tuning.
np.random.seed(0)
c = cost_fun(num_arms, distrib_params)
success = True
opt_epsilon_cost_curr = compute_opt_policy_cost(c, rmin, rmax/(1 + epsilon), B_frac * num_arms/(1 + epsilon))
print('Optimal epsilon-exploration periodic policy cost: %r'%(opt_epsilon_cost_curr))
results = []
#try:
if alg == 'MirrorSync':
if not tried_this_learning_rate:
results = mirror_sync(c, num_arms, learning_rate, horizon, epsilon, B_frac, rmin, rmax, True)
tried_this_learning_rate = True
else:
continue
elif alg == 'AsyncMirrorSync':
results = async_mirror_sync(c, 'ms', num_arms, learning_rate, horizon, epsilon, update_cycle_length, B_frac, rmin, rmax, True)
elif alg == 'AsyncPSGDSync':
results = async_mirror_sync(c, 'psgd', num_arms, learning_rate, horizon, epsilon, update_cycle_length, B_frac, rmin, rmax, True)
#except:
# success = False
if success:
plt.figure(figctr)
if alg == 'MirrorSync':
print("Results for learning rate = ", learning_rate, " are: ")
plt.title("eta = " + str(learning_rate))
else:
print("Results for learning rate = ", learning_rate, ", update cycle length = ", update_cycle_length, " are: ")
plt.title("eta = " + str(learning_rate) + ", update_cycle_length = " + str(update_cycle_length))
print(results)
x = np.arange(len(results)) / len(results) * horizon
plt.plot(x, results)
plt.draw()
else:
if alg == 'MirrorSync':
print("Results for learning rate = ", learning_rate, " are: the convex optimizer is unstable for this parameter combination and RNG seed, trying another parameter combination")
else:
print("Results for learning rate = ", learning_rate, ", update cycle length = ", update_cycle_length, " are: the algorithm is unstable for this parameter combination, trying another one")
figctr += 1
print('\n\n')
plt.show()