Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix file-handle resource leaks #9309

Merged
merged 1 commit into from
Feb 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions docs/autogen.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,13 +489,18 @@ def process_docstring(docstring):
new_fpath = fpath.replace('templates', 'sources')
shutil.copy(fpath, new_fpath)


# Take care of index page.
readme = open('../README.md').read()
index = open('templates/index.md').read()
def read_file(path):
with open(path) as f:
return f.read()


readme = read_file('../README.md')
index = read_file('templates/index.md')
index = index.replace('{{autogenerated}}', readme[readme.find('##'):])
f = open('sources/index.md', 'w')
f.write(index)
f.close()
with open('sources/index.md', 'w') as f:
f.write(index)

print('Starting autogeneration.')
for page_data in PAGES:
Expand Down Expand Up @@ -564,7 +569,7 @@ def process_docstring(docstring):
page_name = page_data['page']
path = os.path.join('sources', page_name)
if os.path.exists(path):
template = open(path).read()
template = read_file(path)
assert '{{autogenerated}}' in template, ('Template found for ' + path +
' but missing {{autogenerated}} tag.')
mkdown = template.replace('{{autogenerated}}', mkdown)
Expand All @@ -574,6 +579,7 @@ def process_docstring(docstring):
subdir = os.path.dirname(path)
if not os.path.exists(subdir):
os.makedirs(subdir)
open(path, 'w').write(mkdown)
with open(path, 'w') as f:
f.write(mkdown)

shutil.copyfile('../CONTRIBUTING.md', 'sources/contributing.md')
7 changes: 4 additions & 3 deletions examples/babi_memnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def vectorize_stories(data):
'$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz\n'
'$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
raise
tar = tarfile.open(path)


challenges = {
# QA1 with 10,000 samples
Expand All @@ -112,8 +112,9 @@ def vectorize_stories(data):
challenge = challenges[challenge_type]

print('Extracting stories for the challenge:', challenge_type)
train_stories = get_stories(tar.extractfile(challenge.format('train')))
test_stories = get_stories(tar.extractfile(challenge.format('test')))
with tarfile.open(path) as tar:
train_stories = get_stories(tar.extractfile(challenge.format('train')))
test_stories = get_stories(tar.extractfile(challenge.format('test')))

vocab = set()
for story, q, answer in train_stories + test_stories:
Expand Down
7 changes: 4 additions & 3 deletions examples/babi_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
'$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz\n'
'$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
raise
tar = tarfile.open(path)

# Default QA1 with 1000 samples
# challenge = 'tasks_1-20_v1-2/en/qa1_single-supporting-fact_{}.txt'
# QA1 with 10,000 samples
Expand All @@ -169,8 +169,9 @@ def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
# QA2 with 10,000 samples
# challenge = 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt'
train = get_stories(tar.extractfile(challenge.format('train')))
test = get_stories(tar.extractfile(challenge.format('test')))
with tarfile.open(path) as tar:
train = get_stories(tar.extractfile(challenge.format('train')))
test = get_stories(tar.extractfile(challenge.format('test')))

vocab = set()
for story, q, answer in train + test:
Expand Down
3 changes: 2 additions & 1 deletion examples/lstm_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@
target_texts = []
input_characters = set()
target_characters = set()
lines = open(data_path, 'r', encoding='utf-8').read().split('\n')
with open(data_path, 'r', encoding='utf-8') as f:
lines = f.read().split('\n')
for line in lines[: min(num_samples, len(lines) - 1)]:
input_text, target_text = line.split('\t')
# We use "tab" as the "start sequence" character
Expand Down
3 changes: 2 additions & 1 deletion examples/lstm_seq2seq_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
target_texts = []
input_characters = set()
target_characters = set()
lines = open(data_path, 'r', encoding='utf-8').read().split('\n')
with open(data_path, 'r', encoding='utf-8') as f:
lines = f.read().split('\n')
for line in lines[: min(num_samples, len(lines) - 1)]:
input_text, target_text = line.split('\t')
# We use "tab" as the "start sequence" character
Expand Down
3 changes: 2 additions & 1 deletion examples/lstm_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
import io

path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = io.open(path, encoding='utf-8').read().lower()
with io.open(path, encoding='utf-8') as f:
text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
Expand Down
4 changes: 2 additions & 2 deletions examples/mnist_acgan.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,5 +341,5 @@ def build_discriminator():
Image.fromarray(img).save(
'plot_epoch_{0:03d}_generated.png'.format(epoch))

pickle.dump({'train': train_history, 'test': test_history},
open('acgan-history.pkl', 'wb'))
with open('acgan-history.pkl', 'wb') as f:
pickle.dump({'train': train_history, 'test': test_history}, f)
30 changes: 13 additions & 17 deletions examples/pretrained_word_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,12 @@
print('Indexing word vectors.')

embeddings_index = {}
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'))
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f:
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs

print('Found %s word vectors.' % len(embeddings_index))

Expand All @@ -62,16 +61,13 @@
for fname in sorted(os.listdir(path)):
if fname.isdigit():
fpath = os.path.join(path, fname)
if sys.version_info < (3,):
f = open(fpath)
else:
f = open(fpath, encoding='latin-1')
t = f.read()
i = t.find('\n\n') # skip header
if 0 < i:
t = t[i:]
texts.append(t)
f.close()
args = {} if sys.version_info < (3,) else {'encoding': 'latin-1'}
with open(fpath, **args) as f:
t = f.read()
i = t.find('\n\n') # skip header
if 0 < i:
t = t[i:]
texts.append(t)
labels.append(label_id)

print('Found %s texts.' % len(texts))
Expand Down
3 changes: 2 additions & 1 deletion keras/applications/imagenet_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ def decode_predictions(preds, top=5):
CLASS_INDEX_PATH,
cache_subdir='models',
file_hash='c2c37ea517e94d9795004a39431a14cb')
CLASS_INDEX = json.load(open(fpath))
with open(fpath) as f:
CLASS_INDEX = json.load(f)
results = []
for pred in preds:
top_indices = pred.argsort()[-top:][::-1]
Expand Down
3 changes: 2 additions & 1 deletion keras/backend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
_config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json'))
if os.path.exists(_config_path):
try:
_config = json.load(open(_config_path))
with open(_config_path) as f:
_config = json.load(f)
except ValueError:
_config = {}
_floatx = _config.get('floatx', floatx())
Expand Down
21 changes: 10 additions & 11 deletions keras/datasets/cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,16 @@ def load_batch(fpath, label_key='labels'):
# Returns
A tuple `(data, labels)`.
"""
f = open(fpath, 'rb')
if sys.version_info < (3,):
d = cPickle.load(f)
else:
d = cPickle.load(f, encoding='bytes')
# decode utf8
d_decoded = {}
for k, v in d.items():
d_decoded[k.decode('utf8')] = v
d = d_decoded
f.close()
with open(fpath, 'rb') as f:
if sys.version_info < (3,):
d = cPickle.load(f)
else:
d = cPickle.load(f, encoding='bytes')
# decode utf8
d_decoded = {}
for k, v in d.items():
d_decoded[k.decode('utf8')] = v
d = d_decoded
data = d['data']
labels = d[label_key]

Expand Down
6 changes: 2 additions & 4 deletions keras/datasets/imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,5 @@ def get_word_index(path='imdb_word_index.json'):
path = get_file(path,
origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.json',
file_hash='bfafd718b763782e994055a2d397834f')
f = open(path)
data = json.load(f)
f.close()
return data
with open(path) as f:
return json.load(f)
30 changes: 13 additions & 17 deletions keras/engine/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -2603,10 +2603,9 @@ def save_weights(self, filepath, overwrite=True):
proceed = ask_to_proceed_with_overwrite(filepath)
if not proceed:
return
f = h5py.File(filepath, 'w')
save_weights_to_hdf5_group(f, self.layers)
f.flush()
f.close()
with h5py.File(filepath, 'w') as f:
save_weights_to_hdf5_group(f, self.layers)
f.flush()

def load_weights(self, filepath, by_name=False,
skip_mismatch=False, reshape=False):
Expand Down Expand Up @@ -2641,19 +2640,16 @@ def load_weights(self, filepath, by_name=False,
"""
if h5py is None:
raise ImportError('`load_weights` requires h5py.')
f = h5py.File(filepath, mode='r')
if 'layer_names' not in f.attrs and 'model_weights' in f:
f = f['model_weights']
if by_name:
load_weights_from_hdf5_group_by_name(
f, self.layers, skip_mismatch=skip_mismatch,
reshape=reshape)
else:
load_weights_from_hdf5_group(
f, self.layers, reshape=reshape)

if hasattr(f, 'close'):
f.close()
with h5py.File(filepath, mode='r') as f:
if 'layer_names' not in f.attrs and 'model_weights' in f:
f = f['model_weights']
if by_name:
load_weights_from_hdf5_group_by_name(
f, self.layers, skip_mismatch=skip_mismatch,
reshape=reshape)
else:
load_weights_from_hdf5_group(
f, self.layers, reshape=reshape)

def _updated_config(self):
"""Util hared between different serialization methods.
Expand Down
13 changes: 6 additions & 7 deletions keras/engine/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -2027,13 +2027,12 @@ def fit_generator(self,
```python
def generate_arrays_from_file(path):
while 1:
f = open(path)
for line in f:
# create numpy arrays of input data
# and labels, from each line in the file
x1, x2, y = process_line(line)
yield ({'input_1': x1, 'input_2': x2}, {'output': y})
f.close()
with open(path) as f:
for line in f:
# create numpy arrays of input data
# and labels, from each line in the file
x1, x2, y = process_line(line)
yield ({'input_1': x1, 'input_2': x2}, {'output': y})

model.fit_generator(generate_arrays_from_file('/my_file.txt'),
steps_per_epoch=10000, epochs=10)
Expand Down
50 changes: 23 additions & 27 deletions keras/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,23 +721,21 @@ def set_weights(self, weights):
def load_weights(self, filepath, by_name=False, skip_mismatch=False, reshape=False):
if h5py is None:
raise ImportError('`load_weights` requires h5py.')
f = h5py.File(filepath, mode='r')
if 'layer_names' not in f.attrs and 'model_weights' in f:
f = f['model_weights']
with h5py.File(filepath, mode='r') as f:
if 'layer_names' not in f.attrs and 'model_weights' in f:
f = f['model_weights']

# Legacy support
if legacy_models.needs_legacy_support(self):
layers = legacy_models.legacy_sequential_layers(self)
else:
layers = self.layers
if by_name:
topology.load_weights_from_hdf5_group_by_name(f, layers,
skip_mismatch=skip_mismatch,
reshape=reshape)
else:
topology.load_weights_from_hdf5_group(f, layers, reshape=reshape)
if hasattr(f, 'close'):
f.close()
# Legacy support
if legacy_models.needs_legacy_support(self):
layers = legacy_models.legacy_sequential_layers(self)
else:
layers = self.layers
if by_name:
topology.load_weights_from_hdf5_group_by_name(f, layers,
skip_mismatch=skip_mismatch,
reshape=reshape)
else:
topology.load_weights_from_hdf5_group(f, layers, reshape=reshape)

def save_weights(self, filepath, overwrite=True):
if h5py is None:
Expand All @@ -753,10 +751,9 @@ def save_weights(self, filepath, overwrite=True):
else:
layers = self.layers

f = h5py.File(filepath, 'w')
topology.save_weights_to_hdf5_group(f, layers)
f.flush()
f.close()
with h5py.File(filepath, 'w') as f:
topology.save_weights_to_hdf5_group(f, layers)
f.flush()

def compile(self, optimizer, loss,
metrics=None,
Expand Down Expand Up @@ -1227,13 +1224,12 @@ def fit_generator(self, generator,
```python
def generate_arrays_from_file(path):
while 1:
f = open(path)
for line in f:
# create Numpy arrays of input data
# and labels, from each line in the file
x, y = process_line(line)
yield (x, y)
f.close()
with open(path) as f:
for line in f:
# create Numpy arrays of input data
# and labels, from each line in the file
x, y = process_line(line)
yield (x, y)

model.fit_generator(generate_arrays_from_file('/my_file.txt'),
steps_per_epoch=1000, epochs=10)
Expand Down
7 changes: 3 additions & 4 deletions keras/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ def chunk_read(response, chunk_size=8192, reporthook=None):
else:
break

response = urlopen(url, data)
with open(filename, 'wb') as fd:
for chunk in chunk_read(response, reporthook=reporthook):
fd.write(chunk)
with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd:
for chunk in chunk_read(response, reporthook=reporthook):
fd.write(chunk)
else:
from six.moves.urllib.request import urlretrieve

Expand Down
15 changes: 7 additions & 8 deletions tests/keras/utils/io_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,13 @@ def in_tmpdir(tmpdir):
def create_dataset(h5_path='test.h5'):
X = np.random.randn(200, 10).astype('float32')
y = np.random.randint(0, 2, size=(200, 1))
f = h5py.File(h5_path, 'w')
# Creating dataset to store features
X_dset = f.create_dataset('my_data', (200, 10), dtype='f')
X_dset[:] = X
# Creating dataset to store labels
y_dset = f.create_dataset('my_labels', (200, 1), dtype='i')
y_dset[:] = y
f.close()
with h5py.File(h5_path, 'w') as f:
# Creating dataset to store features
X_dset = f.create_dataset('my_data', (200, 10), dtype='f')
X_dset[:] = X
# Creating dataset to store labels
y_dset = f.create_dataset('my_labels', (200, 1), dtype='i')
y_dset[:] = y


def test_io_utils(in_tmpdir):
Expand Down