Skip to content

Commit

Permalink
remove old dataloader & generator from quantilization (PaddlePaddle#5…
Browse files Browse the repository at this point in the history
…5754)

* remove old dataloader & generator from quantilization

* fix ut test_post_training_quantization_mnist
  • Loading branch information
zoooo0820 authored and wz1qqx committed Jul 31, 2023
1 parent d3b1cfd commit 14dc6c6
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 61 deletions.
55 changes: 11 additions & 44 deletions python/paddle/static/quantization/post_training_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@
except:
from .utils import tqdm

from inspect import isgeneratorfunction

from paddle.fluid.framework import IrGraph, _get_var

from ... import io, static
from ...fluid import reader
from ...framework import core
from ...utils import unique_name
from ..log_helper import get_logger
Expand Down Expand Up @@ -171,16 +169,16 @@ def __init__(
When all parameters were saved in a single binary file, set it
as the real filename. If parameters were saved in separate files,
set it as 'None'. Default is 'None'.
batch_generator(Python Generator): The batch generator provides
batch_generator(Python Generator, depreceated): The batch generator provides
calibrate data for DataLoader, and it returns a batch every
time. Note that, sample_generator and batch_generator, only one
should be set. Beisdes, batch_generator supports lod tensor.
sample_generator(Python Generator): The sample generator provides
sample_generator(Python Generator, depreceated): The sample generator provides
calibrate data for DataLoader, and it only returns a sample every
time. Note that, sample_generator and batch_generator, only one
should be set. Beisdes, sample_generator dose not support lod tensor.
data_loader(Python Generator, Paddle.io.DataLoader, optional): The
Generator or Dataloader provides calibrate data, and it could
data_loader(Paddle.io.DataLoader): The
Dataloader provides calibrate data, and it could
return a batch every time.
batch_size(int, optional): The batch size of DataLoader. Default is 10.
batch_nums(int, optional): If batch_nums is not None, the number of
Expand Down Expand Up @@ -309,22 +307,12 @@ def __init__(

# Check inputs
assert executor is not None, "The executor cannot be None."
assert any(
[gen is not None]
for gen in [sample_generator, batch_generator, data_loader]
), (
"The sample_generator, batch_generator "
"and data_loader cannot be None in the same time."
)
if data_loader is not None:
assert isinstance(
data_loader,
(
io.DataLoader,
type(isgeneratorfunction),
reader.GeneratorLoader,
),
), "data_loader only accepts `paddle.io.DataLoader` or Generator instance."
assert data_loader is not None, "data_loader cannot be None."

assert isinstance(
data_loader, io.DataLoader
), "data_loader only accepts `paddle.io.DataLoader`."

assert batch_size > 0, "The batch_size should be greater than 0."
assert (
algo in self._support_algo_type
Expand Down Expand Up @@ -615,29 +603,8 @@ def _load_model_data(self):
for var_name in self._feed_list
]

if self._data_loader is not None:
self._batch_nums = (
self._batch_nums if self._batch_nums else len(self._data_loader)
)
return
self._data_loader = reader.DataLoader.from_generator(
feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True
)
if self._sample_generator is not None:
self._data_loader.set_sample_generator(
self._sample_generator,
batch_size=self._batch_size,
drop_last=True,
places=self._place,
)
elif self._batch_generator is not None:
self._data_loader.set_batch_generator(
self._batch_generator, places=self._place
)
self._batch_nums = (
self._batch_nums
if self._batch_nums
else len(list(self._data_loader))
self._batch_nums if self._batch_nums else len(self._data_loader)
)

def _optimize_fp32_model(self):
Expand Down
34 changes: 32 additions & 2 deletions test/quantization/test_post_training_quantization_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,23 @@
np.random.seed(0)


class TransedMnistDataSet(paddle.io.Dataset):
def __init__(self, mnist_data):
self.mnist_data = mnist_data

def __getitem__(self, idx):
img = (
np.array(self.mnist_data[idx][0])
.astype('float32')
.reshape(1, 28, 28)
)
batch = img / 127.5 - 1.0
return {"img": batch}

def __len__(self):
return len(self.mnist_data)


class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self):
self.root_path = tempfile.TemporaryDirectory()
Expand Down Expand Up @@ -217,14 +234,27 @@ def generate_quantized_model(
):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
val_reader = paddle.dataset.mnist.train()

train_dataset = paddle.vision.datasets.MNIST(
mode='train', transform=None
)
train_dataset = TransedMnistDataSet(train_dataset)
BatchSampler = paddle.io.BatchSampler(
train_dataset, batch_size=batch_size
)
val_data_generator = paddle.io.DataLoader(
train_dataset,
batch_sampler=BatchSampler,
places=paddle.static.cpu_places(),
)

ptq = PostTrainingQuantization(
executor=exe,
model_dir=model_path,
model_filename=model_filename,
params_filename=params_filename,
sample_generator=val_reader,
sample_generator=None,
data_loader=val_data_generator,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo,
Expand Down
46 changes: 31 additions & 15 deletions test/quantization/test_post_training_quantization_while.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@
np.random.seed(0)


class TransedMnistDataSet(paddle.io.Dataset):
def __init__(self, mnist_data):
self.mnist_data = mnist_data

def __getitem__(self, idx):
img = (
np.array(self.mnist_data[idx][0])
.astype('float32')
.reshape(1, 28, 28)
)
batch = img / 127.5 - 1.0
return {"x": batch}

def __len__(self):
return len(self.mnist_data)


class TestPostTrainingQuantization(unittest.TestCase):
def setUp(self):
self.download_path = 'int8/download'
Expand Down Expand Up @@ -132,28 +149,30 @@ def generate_quantized_model(
is_optimize_model=False,
batch_size=10,
batch_nums=10,
is_data_loader=False,
):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
val_reader = paddle.dataset.mnist.train()

def val_data_generator():
batches = []
for data in val_reader():
batches.append(data[0].reshape(1, 28, 28))
if len(batches) == batch_size:
batches = np.asarray(batches)
yield {"x": batches}
batches = []
train_dataset = paddle.vision.datasets.MNIST(
mode='train', transform=None
)
train_dataset = TransedMnistDataSet(train_dataset)
BatchSampler = paddle.io.BatchSampler(
train_dataset, batch_size=batch_size
)
val_data_generator = paddle.io.DataLoader(
train_dataset,
batch_sampler=BatchSampler,
places=paddle.static.cpu_places(),
)

ptq = PostTrainingQuantization(
executor=exe,
model_dir=model_path,
model_filename='model.pdmodel',
params_filename='model.pdiparams',
sample_generator=val_reader if not is_data_loader else None,
data_loader=val_data_generator if is_data_loader else None,
sample_generator=None,
data_loader=val_data_generator,
batch_size=batch_size,
batch_nums=batch_nums,
algo=algo,
Expand Down Expand Up @@ -183,7 +202,6 @@ def run_test(
batch_size=10,
infer_iterations=10,
quant_iterations=5,
is_data_loader=False,
):
origin_model_path = self.download_model(data_url, data_md5, model_name)

Expand All @@ -210,7 +228,6 @@ def run_test(
is_optimize_model,
batch_size,
quant_iterations,
is_data_loader=is_data_loader,
)

print(
Expand Down Expand Up @@ -442,7 +459,6 @@ def test_post_training_abs_max(self):
batch_size,
infer_iterations,
quant_iterations,
is_data_loader=True,
)


Expand Down

0 comments on commit 14dc6c6

Please sign in to comment.