Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add efficiency benchmark #18

Merged
merged 12 commits into from
Jul 16, 2020
111 changes: 111 additions & 0 deletions scripts/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import argparse
from timeit import default_timer as timer
from typing import List

import numpy as np
import pandas as pd
import torch
from flair.datasets import CONLL_03_DUTCH
from loguru import logger
from tqdm import tqdm

from deidentify.base import Document
from deidentify.taggers import CRFTagger, DeduceTagger, FlairTagger, TextTagger
from deidentify.tokenizer import TokenizerFactory

N_REPETITIONS = 5
N_SENTS = 5000


def load_data():
corpus = CONLL_03_DUTCH()
sentences = corpus.train[:N_SENTS]
tokens = sum(len(sent) for sent in sentences)
docs = [Document(name='', text=sent.to_plain_string(), annotations=[]) for sent in sentences]
return docs, tokens


def benchmark_tagger(tagger: TextTagger, docs: List[Document], num_tokens: int):
durations = []

for _ in tqdm(range(0, N_REPETITIONS), desc='Repetitions'):
start = timer()
tagger.annotate(docs)
end = timer()
durations.append(end - start)

if isinstance(tagger, FlairTagger) and torch.cuda.is_available():
torch.cuda.empty_cache()

return {
'mean': np.mean(durations),
'std': np.std(durations),
'tokens/s': num_tokens / np.mean(durations),
'docs/s': len(docs) / np.mean(durations),
'num_docs': len(docs),
'num_tokens': num_tokens
}


def main(args):
logger.info('Load data...')
documents, num_tokens = load_data()

logger.info('Initialize taggers...')
tokenizer_crf = TokenizerFactory().tokenizer(corpus='ons', disable=())
tokenizer_bilstm = TokenizerFactory().tokenizer(corpus='ons', disable=("tagger", "ner"))

taggers = [
('DEDUCE', DeduceTagger(verbose=True)),
('CRF', CRFTagger(
model='model_crf_ons_tuned-v0.1.0',
tokenizer=tokenizer_crf,
verbose=True
)),
('BiLSTM-CRF (large)', FlairTagger(
model='model_bilstmcrf_ons_large-v0.1.0',
tokenizer=tokenizer_bilstm,
mini_batch_size=args.bilstmcrf_large_batch_size,
verbose=True
)),
('BiLSTM-CRF (fast)', FlairTagger(
model='model_bilstmcrf_ons_fast-v0.1.0',
tokenizer=tokenizer_bilstm,
mini_batch_size=args.bilstmcrf_fast_batch_size,
verbose=True
))
]

benchmark_results = []
tagger_names = []
for tagger_name, tagger in taggers:
logger.info(f'Benchmark inference for tagger: {tagger_name}')
scores = benchmark_tagger(tagger, documents, num_tokens)
benchmark_results.append(scores)
tagger_names.append(tagger_name)

df = pd.DataFrame(data=benchmark_results, index=tagger_names)
df.to_csv(f'{args.benchmark_name}.csv')
logger.info('\n{}', df)


def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument("benchmark_name", type=str, help="Name of the benchmark.")
parser.add_argument(
"--bilstmcrf_large_batch_size",
type=int,
help="Batch size to use with the large model.",
default=256
)
parser.add_argument(
"--bilstmcrf_fast_batch_size",
type=int,
help="Batch size to use with the fast model.",
default=256
)
return parser.parse_args()


if __name__ == '__main__':
main(arg_parser())
21 changes: 21 additions & 0 deletions scripts/benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

set -e

export CUDA_VISIBLE_DEVICES=0
# Smaller batch size so that sequences with Flair embeddings fit in GPU memory.
python -m scripts.benchmark benchmark_gpu \
--bilstmcrf_large_batch_size 64 \
--bilstmcrf_fast_batch_size 64

export CUDA_VISIBLE_DEVICES=""
export MKL_NUM_THREADS=32
python -m scripts.benchmark benchmark_cpu_32_threads

export CUDA_VISIBLE_DEVICES=""
export MKL_NUM_THREADS=16
python -m scripts.benchmark benchmark_cpu_16_threads

export CUDA_VISIBLE_DEVICES=""
export MKL_NUM_THREADS=8
python -m scripts.benchmark benchmark_cpu_8_threads