Skip to content

Commit

Permalink
Merge pull request #1642 from ShiningZhang/doc
Browse files Browse the repository at this point in the history
update Serving_Configure
  • Loading branch information
ShiningZhang authored Feb 21, 2022
2 parents fe0a0e4 + e3f87a5 commit 47a560a
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 0 deletions.
51 changes: 51 additions & 0 deletions doc/Serving_Configure_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -364,11 +364,41 @@ dag:
tracer:
interval_s: 10

#client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
#client_type: local_predictor

#channel的最大长度,默认为0
#channel_size: 0

#针对大模型分布式场景tensor并行,接收第一个返回结果后其他结果丢弃来提供速度
#channel_recv_frist_arrive: False

op:
det:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 6

#Serving IPs
#server_endpoints: ["127.0.0.1:9393"]

#Fetch结果列表,以client_config中fetch_var的alias_name为准
#fetch_list: ["concat_1.tmp_0"]

#det模型client端配置
#client_config: serving_client_conf.prototxt

#Serving交互超时时间, 单位ms
#timeout: 3000

#Serving交互重试次数,默认不重试
#retry: 1

# 批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞
#batch_size: 2

# 批量查询超时,与batch_size配合使用
#auto_batching_timeout: 2000

#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
#client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
Expand Down Expand Up @@ -399,6 +429,27 @@ op:
#GPU 支持: "fp32"(default), "fp16", "int8";
#CPU 支持: "fp32"(default), "fp16", "bf16"(mkldnn); 不支持: "int8"
precision: "fp32"

#mem_optim, memory / graphic memory optimization
#mem_optim: True

#use_calib, Use TRT int8 calibration
#use_calib: False

#use_mkldnn, Use mkldnn for cpu
#use_mkldnn: False

#The cache capacity of different input shapes for mkldnn
#mkldnn_cache_capacity: 0

#mkldnn_op_list, op list accelerated using MKLDNN, None default
#mkldnn_op_list: []

#mkldnn_bf16_op_list,op list accelerated using MKLDNN bf16, None default.
#mkldnn_bf16_op_list: []

#min_subgraph_size,the minimal subgraph size for opening tensorrt to optimize, 3 default
#min_subgraph_size: 3
rec:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 3
Expand Down
51 changes: 51 additions & 0 deletions doc/Serving_Configure_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -369,11 +369,41 @@ dag:
tracer:
interval_s: 10

#client type,include brpc, grpc and local_predictor.
#client_type: local_predictor

# max channel size, default 0
#channel_size: 0

#For distributed large model scenario with tensor parallelism, the first result is received and the other results are discarded to provide speed
#channel_recv_frist_arrive: False

op:
det:
#concurrency,is_thread_op=True,thread otherwise process
concurrency: 6

#Serving IPs
#server_endpoints: ["127.0.0.1:9393"]

#Fetch data list
#fetch_list: ["concat_1.tmp_0"]

#det client config
#client_config: serving_client_conf.prototxt

#Serving timeout, ms
#timeout: 3000

#Serving retry times
#retry: 1

#Default 1。batch_size>1 should set auto_batching_timeout
#batch_size: 2

#Batching timeout,used with batch_size
#auto_batching_timeout: 2000

#Loading local server configuration without server_endpoints.
local_service_conf:
#client type,include brpc, grpc and local_predictor.
Expand Down Expand Up @@ -404,6 +434,27 @@ op:
#GPU 支持: "fp32"(default), "fp16", "int8";
#CPU 支持: "fp32"(default), "fp16", "bf16"(mkldnn); 不支持: "int8"
precision: "fp32"

#mem_optim, memory / graphic memory optimization
#mem_optim: True

#use_calib, Use TRT int8 calibration
#use_calib: False

#use_mkldnn, Use mkldnn for cpu
#use_mkldnn: False

#The cache capacity of different input shapes for mkldnn
#mkldnn_cache_capacity: 0

#mkldnn_op_list, op list accelerated using MKLDNN, None default
#mkldnn_op_list: []

#mkldnn_bf16_op_list,op list accelerated using MKLDNN bf16, None default.
#mkldnn_bf16_op_list: []

#min_subgraph_size,the minimal subgraph size for opening tensorrt to optimize, 3 default
#min_subgraph_size: 3
rec:
#concurrency,is_thread_op=True,thread otherwise process
concurrency: 3
Expand Down

0 comments on commit 47a560a

Please sign in to comment.