Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
5717b13
Update distilbert_base
ChendaLi-Intel Nov 28, 2022
5f5139a
Update distilbert_base
ChendaLi-Intel Nov 28, 2022
1952588
Update bert_base_mrpc
ChendaLi-Intel Nov 30, 2022
c7779e2
Add words to ignore when doing the spell check
ChendaLi-Intel Dec 1, 2022
0f7bbc8
Merge branch 'master' into chenda/inc_newapi
chensuyue Dec 1, 2022
f7d101a
Merge branch 'master' of https://github.com/intel/neural-compressor i…
ChendaLi-Intel Dec 2, 2022
336e5fe
merge and solve conflict
zehao-intel Dec 6, 2022
83d7d4c
add code for transformer_lt and transformer_lt_mlperf
zehao-intel Dec 6, 2022
40ec6ec
refine 4 nlp models
zehao-intel Dec 8, 2022
383687b
Merge branch 'master' into chenda/inc_newapi
zehao-intel Dec 8, 2022
5ed6059
fix spell
zehao-intel Dec 8, 2022
3ca3912
Merge branch 'master' into chenda/inc_newapi
zehao-intel Dec 8, 2022
63a41ed
remove unnecessary common.Model
zehao-intel Dec 9, 2022
816b59e
Merge branch 'chenda/inc_newapi' of https://github.com/intel/neural-c…
zehao-intel Dec 9, 2022
d8d5315
fix iteration=None in tf_utils.util.iterator_sess_run
zehao-intel Dec 9, 2022
cc909e4
update example config
zehao-intel Dec 9, 2022
20b33de
Merge branch 'master' into chenda/inc_newapi
zehao-intel Dec 9, 2022
300eb57
remove old api tf nlp examples
zehao-intel Dec 9, 2022
adf18a2
remove old api image recognition examples
zehao-intel Dec 9, 2022
7c96dec
support removed model test
chensuyue Dec 11, 2022
c27adfc
add default benchmark config
chensuyue Dec 11, 2022
374b418
merge master
zehao-intel Dec 13, 2022
6b8f185
remove experimental import
zehao-intel Dec 13, 2022
74dbeca
Merge branch 'chenda/inc_newapi' of https://github.com/intel/neural-c…
zehao-intel Dec 13, 2022
e0773a9
fix bugs
zehao-intel Dec 13, 2022
f272307
fix
zehao-intel Dec 13, 2022
58d102d
fix issues
zehao-intel Dec 14, 2022
4dde5c9
fix transformer_lt example config
zehao-intel Dec 14, 2022
88de8ac
fix transformer_lt benchmark config args
zehao-intel Dec 14, 2022
74d8578
Update run_benchmark.sh
chensuyue Dec 14, 2022
fbc839c
update topology to scripts
zehao-intel Dec 15, 2022
57b92c2
Merge branch 'chenda/inc_newapi' of https://github.com/intel/neural-c…
zehao-intel Dec 15, 2022
7fe5a1c
remove topology and args check
zehao-intel Dec 15, 2022
aca1a38
fix bert_base import
zehao-intel Dec 20, 2022
13929ec
fix distillbert
zehao-intel Dec 20, 2022
b0ad837
fix config
zehao-intel Dec 20, 2022
2f3af90
fix bert latency issue
zehao-intel Dec 20, 2022
93a5017
solve conflicts
zehao-intel Dec 20, 2022
bfc6d27
Merge branch 'master' of https://github.com/intel/neural-compressor i…
lvliang-intel Dec 21, 2022
88b519d
Merge branch 'chenda/inc_newapi' of https://github.com/intel/neural-c…
lvliang-intel Dec 21, 2022
801ae60
remove yolo_v3 utils
lvliang-intel Dec 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .azure-pipelines/scripts/codeScan/pyspelling/lpot_dict.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2379,3 +2379,7 @@ grappler
amsgrad
qoperator
apis
AccuracyCriterion
AccuracyLoss
PostTrainingQuantConfig
TuningCriterion
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,7 @@ def result(self):

eval_examples = processor.get_dev_examples(FLAGS.data_dir)
eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
dataset = Dataset(eval_file, FLAGS.eval_batch_size)

convert_examples_to_features(
examples=eval_examples,
Expand All @@ -1085,19 +1086,54 @@ def result(self):
is_training=False,
drop_remainder=False)

from neural_compressor.experimental import Quantization, common
quantizer = Quantization(FLAGS.config)
dataset = Dataset(eval_file, FLAGS.eval_batch_size)
quantizer.model = common.Model(estimator, input_fn=estimator_input_fn)
quantizer.calib_dataloader = common.DataLoader(dataset, collate_fn=collate_fn)
quantizer.eval_dataloader = common.DataLoader(dataset, collate_fn=collate_fn)
quantizer.metric = Accuracy()
q_model = quantizer.fit()
from neural_compressor.experimental import common
from neural_compressor.quantization import fit
from neural_compressor.config import PostTrainingQuantConfig, \
TuningCriterion, AccuracyCriterion, AccuracyLoss, set_random_seed

set_random_seed(9527)

tuning_criterion = TuningCriterion(
strategy="basic",
timeout=0,
max_trials=100,
objective="performance")

tolerable_loss = AccuracyLoss(loss=0.01)

accuracy_criterion = AccuracyCriterion(
higher_is_better=True,
criterion='relative',
tolerable_loss=tolerable_loss)

config = PostTrainingQuantConfig(
device="cpu",
backend="tensorflow",
inputs=["input_file", "batch_size"],
outputs=["loss/Softmax:0", "IteratorGetNext:3"],
approach="static",
calibration_sampling_size=[500],
op_type_list=None,
op_name_list=None,
reduce_range=None,
extra_precisions=[],
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion)

q_model = fit(
model=common.Model(estimator, input_fn=estimator_input_fn),
conf=config,
calib_dataloader=common.DataLoader(dataset, collate_fn=collate_fn),
calib_func=None,
eval_dataloader=common.DataLoader(dataset, collate_fn=collate_fn),
eval_func=None,
eval_metric=Accuracy())

if FLAGS.strip_iterator:
q_model.graph_def = strip_iterator(q_model.graph_def)
q_model.save(FLAGS.output_model)

if FLAGS.benchmark:
if FLAGS.benchmark and FLAGS.mode=="accuracy":
eval_examples = processor.get_dev_examples(FLAGS.data_dir)
eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")

Expand All @@ -1120,8 +1156,82 @@ def result(self):
is_training=False,
drop_remainder=False)
evaluator.model = common.Model(estimator, input_fn=estimator_input_fn)
evaluator(FLAGS.mode)
evaluator("accuracy")

""" Refactor code of benchmark in accuracy mode with INC User NewAPI.
from neural_compressor.experimental import common
from neural_compressor.benchmark import fit
from neural_compressor.config import BenchmarkConfig
from neural_compressor.model.model import get_model_type

model_type = get_model_type(FLAGS.input_model)
if model_type == 'frozen_pb':
model = FLAGS.input_model
else:
estimator_input_fn = input_fn_builder(
input_file=eval_file,
seq_length=FLAGS.max_seq_length,
is_training=False,
drop_remainder=False)
model = common.Model(estimator, input_fn=estimator_input_fn)

dataset = Dataset(eval_file, FLAGS.eval_batch_size)
b_dataloader = common.DataLoader(
dataset,
batch_size=FLAGS.eval_batch_size,
collate_fn=collate_fn)

config = BenchmarkConfig(
warmup=5,
iteration=20,
cores_per_instance=28,
num_of_instance=1,
inter_num_of_threads=None,
intra_num_of_threads=None)

fit(model=model, config=config, b_dataloader=b_dataloader, b_func=b_func)
"""
# Cannot pass 'metric' and 'mode' in fit.
# TODO: implement a b_func.

elif FLAGS.benchmark and FLAGS.mode=="performance":
from neural_compressor.experimental import common
from neural_compressor.benchmark import fit
from neural_compressor.config import BenchmarkConfig
from neural_compressor.model.model import get_model_type

eval_examples = processor.get_dev_examples(FLAGS.data_dir)
eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")

model_type = get_model_type(FLAGS.input_model)
if model_type == 'frozen_pb':
model = FLAGS.input_model
else:
estimator_input_fn = input_fn_builder(
input_file=eval_file,
seq_length=FLAGS.max_seq_length,
is_training=False,
drop_remainder=False)
model = common.Model(estimator, input_fn=estimator_input_fn)

dataset = Dataset(eval_file, FLAGS.eval_batch_size)
b_dataloader = common.DataLoader(
dataset,
batch_size=FLAGS.eval_batch_size,
collate_fn=collate_fn)

config = BenchmarkConfig(
inputs=["input_file", "batch_size"],
outputs=["loss/Softmax:0" ,"IteratorGetNext:3"],
warmup=5,
iteration=20,
cores_per_instance=28,
num_of_instance=1,
inter_num_of_threads=None,
intra_num_of_threads=None)

fit(model=model, config=config, b_dataloader=b_dataloader, b_func=None)


if __name__ == "__main__":
flags.mark_flag_as_required("data_dir")
Expand Down
79 changes: 41 additions & 38 deletions examples/tensorflow/nlp/distilbert_base/quantization/ptq/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ bash run_tuning.sh \
--input_model=$INPUT_MODEL \
--dataset_location=$DATASET_DIR \
--output_model=$OUTPUT_MODEL \
--config=$CONFIG_FILE \
--batch_size=$BATCH_SIZE \
--max_seq_length=$MAX_SEQ \
--warmup_steps=$WARMUPS \
Expand Down Expand Up @@ -94,7 +93,6 @@ Where (Default values are shown in the square brackets):
* $INPUT_MODEL ["./distilbert_base_fp32.pb"]-- The path to input FP32 frozen model .pb file to load
* $DATASET_DIR ["./sst2_validation_dataset"]-- The path to input dataset directory
* $OUTPUT_MODEL ["./output_distilbert_base_int8.pb"]-- The user-specified export path to the output INT8 quantized model
* $CONFIG_FILE ["./distilbert_base.yaml"]-- The path to quantization configuration .yaml file to load for tuning
* $BATCH_SIZE [128]-- The batch size for model inference
* $MAX_SEQ [128]-- The maximum total sequence length after tokenization
* $ITERS [872]-- The number of iterations to run in benchmark mode, maximum value is 872
Expand All @@ -108,7 +106,7 @@ Details of enabling Intel® Neural Compressor on DistilBERT base for TensorFlow

This is a tutorial of how to enable DistilBERT base model with Intel® Neural Compressor.
## User Code Analysis
1. User specifies fp32 *model*, calibration dataloader *q_dataloader*, evaluation dataloader *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file.
1. User specifies fp32 *model*, calibration dataloader *q_dataloader*, evaluation dataloader *eval_dataloader* and metric in tuning.metric field of model-specific config.

2. User specifies fp32 *model*, calibration dataloader *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataloader and metric by itself.

Expand Down Expand Up @@ -138,45 +136,50 @@ class Dataloader(object):
yield feed_dict, labels
```

### Write Yaml Config File
In examples directory, there is a distilbert_base.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The distilbert_base_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor.

```yaml
model:
name: distilbert_base
framework: tensorflow

device: cpu # optional. default value is cpu, other value is gpu.

quantization:
calibration:
sampling_size: 500
model_wise:
weight:
granularity: per_channel

tuning:
accuracy_criterion:
relative: 0.02
exit_policy:
timeout: 0
max_trials: 100
performance_only: False
random_seed: 9527
```

In this case we calibrate and quantize the model, and use our user-defined calibration dataloader.

### Code Update
After prepare step is done, we add the code for quantization tuning to generate quantized model.

```python
from neural_compressor.experimental import Quantization, common
quantizer = Quantization(ARGS.config)
quantizer.calib_dataloader = self.dataloader
quantizer.model = common.Model(graph)
quantizer.eval_func = self.eval_func
q_model = quantizer.fit()
from neural_compressor.quantization import fit
from neural_compressor.config import PostTrainingQuantConfig, \
TuningCriterion, AccuracyCriterion, AccuracyLoss, set_random_seed

set_random_seed(9527)
tuning_criterion = TuningCriterion(
strategy="basic",
timeout=0,
max_trials=100,
objective="performance")

tolerable_loss = AccuracyLoss(loss=0.02)

accuracy_criterion = AccuracyCriterion(
higher_is_better=True,
criterion='relative',
tolerable_loss=tolerable_loss)

config = PostTrainingQuantConfig(
device="cpu",
backend="tensorflow",
inputs=[],
outputs=[],
approach="static",
calibration_sampling_size=[500],
op_type_list=None,
op_name_list=None,
reduce_range=None,
extra_precisions=[],
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion)

q_model = fit(
model=graph,
conf=config,
calib_dataloader=self.dataloader,
calib_func=None,
eval_dataloader=None,
eval_func=self.eval_func,
eval_metric=None)
```

The Intel® Neural Compressor quantizer.fit() function will return a best quantized model under time constraint.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

#
# -*- coding: utf-8 -*-
#
Expand Down Expand Up @@ -25,7 +26,9 @@
from transformers import AutoTokenizer
from datasets import load_from_disk
from tensorflow.core.protobuf import saved_model_pb2
from neural_compressor.experimental import Quantization, common
from neural_compressor.quantization import fit
from neural_compressor.config import PostTrainingQuantConfig, \
TuningCriterion, AccuracyCriterion, AccuracyLoss, set_random_seed
from neural_compressor.utils.utility import dump_elapsed_time
from neural_compressor.utils import logger

Expand Down Expand Up @@ -259,11 +262,43 @@ def eval_func(self, graph):
def run(self):
graph = self.load_graph()
if ARGS.mode == "tune":
quantizer = Quantization(ARGS.config)
quantizer.calib_dataloader = self.dataloader
quantizer.model = common.Model(graph)
quantizer.eval_func = self.eval_func
q_model = quantizer.fit()
set_random_seed(9527)
tuning_criterion = TuningCriterion(
strategy="basic",
timeout=0,
max_trials=100,
objective="performance")

tolerable_loss = AccuracyLoss(loss=0.02)

accuracy_criterion = AccuracyCriterion(
higher_is_better=True,
criterion='relative',
tolerable_loss=tolerable_loss)

config = PostTrainingQuantConfig(
device="cpu",
backend="tensorflow",
inputs=[],
outputs=[],
approach="static",
calibration_sampling_size=[500],
op_type_list=None,
op_name_list=None,
reduce_range=None,
extra_precisions=[],
tuning_criterion=tuning_criterion,
accuracy_criterion=accuracy_criterion)

q_model = fit(
model=graph,
conf=config,
calib_dataloader=self.dataloader,
calib_func=None,
eval_dataloader=None,
eval_func=self.eval_func,
eval_metric=None)

try:
q_model.save(ARGS.output_graph)
except Exception as e:
Expand Down
1 change: 1 addition & 0 deletions neural_compressor/adaptor/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ def evaluate(self, model, dataloader, postprocess=None,
import tensorflow as tf
from .tf_utils.util import iterator_sess_run
outputs = model.output_tensor_names
iteration=-1 if iteration is None else iteration

if getattr(dataloader, 'distributed', False):
import horovod.tensorflow as hvd
Expand Down