Skip to content

Commit e99f0b0

Browse files
authored
Add efficiency benchmark (#18)
* Add efficiency benchmark script * Set better defaults for number of repetitions and sentences * Use human readable tagger names in statistics table * Add wrapper script to start cpu/gpu benchmarks * Change benchmark wrapper to fail on non-zero exit codes * Increase number of sentences in benchmark * Explicitly save number of docs and number of tokens * Add parameters to configure batch size of BiLSTM-CRF * Correctly apply parameters * Fix typo * Clear cuda cache in between repetitions
1 parent 31019c8 commit e99f0b0

File tree

2 files changed

+132
-0
lines changed

2 files changed

+132
-0
lines changed

scripts/benchmark.py

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import argparse
2+
from timeit import default_timer as timer
3+
from typing import List
4+
5+
import numpy as np
6+
import pandas as pd
7+
import torch
8+
from flair.datasets import CONLL_03_DUTCH
9+
from loguru import logger
10+
from tqdm import tqdm
11+
12+
from deidentify.base import Document
13+
from deidentify.taggers import CRFTagger, DeduceTagger, FlairTagger, TextTagger
14+
from deidentify.tokenizer import TokenizerFactory
15+
16+
N_REPETITIONS = 5
17+
N_SENTS = 5000
18+
19+
20+
def load_data():
21+
corpus = CONLL_03_DUTCH()
22+
sentences = corpus.train[:N_SENTS]
23+
tokens = sum(len(sent) for sent in sentences)
24+
docs = [Document(name='', text=sent.to_plain_string(), annotations=[]) for sent in sentences]
25+
return docs, tokens
26+
27+
28+
def benchmark_tagger(tagger: TextTagger, docs: List[Document], num_tokens: int):
29+
durations = []
30+
31+
for _ in tqdm(range(0, N_REPETITIONS), desc='Repetitions'):
32+
start = timer()
33+
tagger.annotate(docs)
34+
end = timer()
35+
durations.append(end - start)
36+
37+
if isinstance(tagger, FlairTagger) and torch.cuda.is_available():
38+
torch.cuda.empty_cache()
39+
40+
return {
41+
'mean': np.mean(durations),
42+
'std': np.std(durations),
43+
'tokens/s': num_tokens / np.mean(durations),
44+
'docs/s': len(docs) / np.mean(durations),
45+
'num_docs': len(docs),
46+
'num_tokens': num_tokens
47+
}
48+
49+
50+
def main(args):
51+
logger.info('Load data...')
52+
documents, num_tokens = load_data()
53+
54+
logger.info('Initialize taggers...')
55+
tokenizer_crf = TokenizerFactory().tokenizer(corpus='ons', disable=())
56+
tokenizer_bilstm = TokenizerFactory().tokenizer(corpus='ons', disable=("tagger", "ner"))
57+
58+
taggers = [
59+
('DEDUCE', DeduceTagger(verbose=True)),
60+
('CRF', CRFTagger(
61+
model='model_crf_ons_tuned-v0.1.0',
62+
tokenizer=tokenizer_crf,
63+
verbose=True
64+
)),
65+
('BiLSTM-CRF (large)', FlairTagger(
66+
model='model_bilstmcrf_ons_large-v0.1.0',
67+
tokenizer=tokenizer_bilstm,
68+
mini_batch_size=args.bilstmcrf_large_batch_size,
69+
verbose=True
70+
)),
71+
('BiLSTM-CRF (fast)', FlairTagger(
72+
model='model_bilstmcrf_ons_fast-v0.1.0',
73+
tokenizer=tokenizer_bilstm,
74+
mini_batch_size=args.bilstmcrf_fast_batch_size,
75+
verbose=True
76+
))
77+
]
78+
79+
benchmark_results = []
80+
tagger_names = []
81+
for tagger_name, tagger in taggers:
82+
logger.info(f'Benchmark inference for tagger: {tagger_name}')
83+
scores = benchmark_tagger(tagger, documents, num_tokens)
84+
benchmark_results.append(scores)
85+
tagger_names.append(tagger_name)
86+
87+
df = pd.DataFrame(data=benchmark_results, index=tagger_names)
88+
df.to_csv(f'{args.benchmark_name}.csv')
89+
logger.info('\n{}', df)
90+
91+
92+
def arg_parser():
93+
parser = argparse.ArgumentParser()
94+
parser.add_argument("benchmark_name", type=str, help="Name of the benchmark.")
95+
parser.add_argument(
96+
"--bilstmcrf_large_batch_size",
97+
type=int,
98+
help="Batch size to use with the large model.",
99+
default=256
100+
)
101+
parser.add_argument(
102+
"--bilstmcrf_fast_batch_size",
103+
type=int,
104+
help="Batch size to use with the fast model.",
105+
default=256
106+
)
107+
return parser.parse_args()
108+
109+
110+
if __name__ == '__main__':
111+
main(arg_parser())

scripts/benchmark.sh

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
export CUDA_VISIBLE_DEVICES=0
6+
# Smaller batch size so that sequences with Flair embeddings fit in GPU memory.
7+
python -m scripts.benchmark benchmark_gpu \
8+
--bilstmcrf_large_batch_size 64 \
9+
--bilstmcrf_fast_batch_size 64
10+
11+
export CUDA_VISIBLE_DEVICES=""
12+
export MKL_NUM_THREADS=32
13+
python -m scripts.benchmark benchmark_cpu_32_threads
14+
15+
export CUDA_VISIBLE_DEVICES=""
16+
export MKL_NUM_THREADS=16
17+
python -m scripts.benchmark benchmark_cpu_16_threads
18+
19+
export CUDA_VISIBLE_DEVICES=""
20+
export MKL_NUM_THREADS=8
21+
python -m scripts.benchmark benchmark_cpu_8_threads

0 commit comments

Comments
 (0)