Skip to content

Commit

Permalink
pscore global shuffle&default accessor config (#37626)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaocaibei123 authored Nov 30, 2021
1 parent 2f4c089 commit 1514eec
Show file tree
Hide file tree
Showing 8 changed files with 323 additions and 95 deletions.
81 changes: 56 additions & 25 deletions paddle/fluid/framework/distributed_strategy.proto
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ enum TableType {
message TableParameter {
optional uint64 table_id = 1;
optional string table_class = 2;
optional uint64 shard_num = 3;
optional uint64 shard_num = 3 [ default = 1000 ];
optional TableType type = 4;
optional TableAccessorParameter accessor = 5;
}
Expand All @@ -190,42 +190,73 @@ message TableAccessorParameter {
optional string accessor_class = 1;
optional SGDParameter embed_sgd_param = 2;
optional SGDParameter embedx_sgd_param = 3;
optional uint32 fea_dim = 4; // for sparse table, this means field size of one
// value; for dense table, this means total value
// num
optional uint32 embedx_dim = 5; // embedx feature size
optional uint32 embedx_threshold = 6; // embedx feature create threshold
optional uint32 fea_dim = 4 [ default = 11 ]; // field size of one value
optional uint32 embedx_dim = 5 [ default = 8 ]; // embedx feature size
optional uint32 embedx_threshold = 6
[ default = 10 ]; // embedx feature create threshold
optional CtrAccessorParameter ctr_accessor_param = 7;
repeated TableAccessorSaveParameter table_accessor_save_param = 8;
}

// TODO(guanqun): add NaiveSGD/Adam...
message SGDParameter {
optional string name = 1;
optional SGDRuleParameter adagrad = 2;
optional SparseNaiveSGDRuleParameter naive = 2;
optional SparseAdagradSGDRuleParameter adagrad = 3;
optional SparseAdamSGDParameter adam = 4;
}

message SGDRuleParameter {
optional double learning_rate = 1;
optional double initial_g2sum = 2;
optional double initial_range = 3 [ default = 0 ];
message SparseNaiveSGDRuleParameter { // SparseNaiveSGDRule
optional double learning_rate = 1 [ default = 0.05 ];
optional double initial_range = 2 [ default = 0.0001 ];
repeated float weight_bounds = 3;
}

message
SparseAdagradSGDRuleParameter { // SparseAdaGradSGDRule|StdAdaGradSGDRule
optional double learning_rate = 1 [ default = 0.05 ];
optional double initial_g2sum = 2 [ default = 3.0 ];
optional double initial_range = 3 [ default = 0.0001 ];
repeated float weight_bounds = 4;
}

message SparseAdamSGDParameter { // SparseAdamSGDRule
optional double learning_rate = 1 [ default = 0.001 ];
optional double initial_range = 2 [ default = 0.0001 ];
optional double beta1_decay_rate = 3 [ default = 0.9 ];
optional double beta2_decay_rate = 4 [ default = 0.999 ];
optional double ada_epsilon = 5 [ default = 1e-08 ];
repeated float weight_bounds = 6;
}

message CtrAccessorParameter {
optional float nonclk_coeff = 1; // to calculate show_click_score
optional float click_coeff = 2; // to calculate show_click_score
optional float base_threshold =
3; // show_click_score > base_threshold, this feature can be saved
optional float delta_threshold =
4; // delta_score > delta_threshold, this feature can be saved
optional float delta_keep_days =
5; // unseen_day < delta_keep_days, this feature can be saved
optional float show_click_decay_rate = 6; // show/click will update to
// show/click *
// show_click_decay_rate after a day
optional float delete_threshold = 7; // threshold to shrink a feasign
optional float delete_after_unseen_days = 8;
optional int32 ssd_unseenday_threshold = 9;
optional float nonclk_coeff = 1
[ default = 0.1 ]; // to calculate show_click_score
optional float click_coeff = 2
[ default = 1 ]; // to calculate show_click_score
optional float base_threshold = 3 [
default = 1.5
]; // show_click_score > base_threshold, this feature can be saved
optional float delta_threshold = 4
[ default =
0.25 ]; // delta_score > delta_threshold, this feature can be saved
optional float delta_keep_days = 5
[ default =
16 ]; // unseen_day < delta_keep_days, this feature can be saved
optional float show_click_decay_rate = 6
[ default = 0.98 ]; // show/click will update to
// show/click *
// show_click_decay_rate after a day
optional float delete_threshold = 7
[ default = 0.8 ]; // threshold to shrink a feasign
optional float delete_after_unseen_days = 8 [ default = 30 ];
optional int32 ssd_unseenday_threshold = 9 [ default = 1 ];
}

message TableAccessorSaveParameter {
optional uint32 param = 1;
optional string converter = 2;
optional string deconverter = 3;
}

message FsClientParameter {
Expand Down
24 changes: 20 additions & 4 deletions python/paddle/distributed/fleet/base/distributed_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,22 +470,38 @@ def sparse_table_configs(self, configs):
from google.protobuf.descriptor import FieldDescriptor
table_param = self.strategy.downpour_table_param

def set_table_config(msg, config_name, configs):
def set_table_config(msg, config_name, configs, index=0):
for field in msg.DESCRIPTOR.fields:
name = config_name + "." + field.name
if field.type == FieldDescriptor.TYPE_MESSAGE:
print("message:", name)
set_table_config(getattr(msg, field.name), name, configs)
if field.label == FieldDescriptor.LABEL_REPEATED:
if name + ".num" not in configs:
continue
num = configs[name + ".num"]
print("message num:", name, num)
for i in range(num):
data = getattr(msg, field.name).add()
set_table_config(data, name, configs, i)
else:
set_table_config(
getattr(msg, field.name), name, configs)
else:
print("not message:", name)
if name not in configs:
continue
if field.label == FieldDescriptor.LABEL_REPEATED:
getattr(msg, field.name).extend(configs[name])
else:
setattr(msg, field.name, configs[name])
if type(configs[name]) == list:
setattr(msg, field.name, configs[name][index])
else:
setattr(msg, field.name, configs[name])

set_table_config(table_param, "table_parameters", configs)
if not configs:
print("table configs is empty")
else:
set_table_config(table_param, "table_parameters", configs)

@property
def amp(self):
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/fleet/base/fleet_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ def save_persistables(self, executor, dirname, main_program=None, mode=0):
self._runtime_handle._save_persistables(executor, dirname, main_program,
mode)

def shrink(self, threshold):
def shrink(self, threshold=None):
self._runtime_handle._shrink(threshold)

def distributed_optimizer(self, optimizer, strategy=None):
Expand Down
Loading

0 comments on commit 1514eec

Please sign in to comment.