Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Commit

Permalink
Wait for gc clean-up in main thread before fork in DatasetLoader (#1525)
Browse files Browse the repository at this point in the history
cf apache/mxnet@fee8bf4

Signed-off-by: Sheng Zha <[email protected]>
  • Loading branch information
szha authored Feb 22, 2021
1 parent 46c9221 commit 223f1f6
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion src/gluonnlp/data/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@

import io
import os
import gc
import glob
import pickle
import warnings
import multiprocessing
from functools import partial

import numpy as np
from mxnet import context
from mxnet import context, npx
from mxnet.gluon.data import ArrayDataset, SimpleDataset
from mxnet.gluon.data.dataloader import ForkingPickler, _as_in_context, \
default_mp_batchify_fn, default_batchify_fn
Expand Down Expand Up @@ -480,12 +481,20 @@ def __init__(self, file_patterns, file_sampler,
self._manager = None
self._dataset_worker_pool = None
if self._num_dataset_workers > 0:
npx.waitall()
import gc
gc.collect()
npx.waitall()
self._manager = multiprocessing.Manager()
self._dataset_worker_pool = multiprocessing.Pool(self._num_dataset_workers,
initializer=_initialize_dataset_worker,
initargs=[self._manager])
self._batch_worker_pool = None
if self._num_batch_workers > 0:
npx.waitall()
import gc
gc.collect()
npx.waitall()
self._batch_worker_pool = multiprocessing.Pool(self._num_batch_workers)
if batchify_fn is None:
if self._num_batch_workers > 0:
Expand Down

0 comments on commit 223f1f6

Please sign in to comment.