Skip to content

Commit

Permalink
Release: QnA Module V1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
qqaazz0222 committed Nov 3, 2024
1 parent 61d146d commit cb99e5d
Show file tree
Hide file tree
Showing 20 changed files with 566 additions and 0 deletions.
138 changes: 138 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyderworkspace

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

datasets/

*.bin
36 changes: 36 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
ARG PYTORCH="2.2.0"
ARG CUDA="12.1"
ARG CUDNN="8"

FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel

ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
ENV DEBIAN_FRONTEND=noninteractive

# Update package list and install software-properties-common
RUN apt update && apt install -y software-properties-common

# Add deadsnakes PPA for Python 3.9
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt update

RUN apt install -y git vim libgl1-mesa-glx libglib2.0-0 ninja-build libsm6 libxrender-dev libxext6 libgl1-mesa-glx python3.9 python3.9-dev python3.9-distutils wget net-tools zip unzip
RUN apt-get clean && rm -rf /var/lib/apt/lists/*

# Set Python 3.9 as the default python version
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1

# Install pip for Python 3.9
RUN wget https://bootstrap.pypa.io/get-pip.py
RUN python3.9 get-pip.py

# Install Python Library
RUN pip install --upgrade pip
RUN pip install --upgrade setuptools
RUN pip install Pillow Flask Flask-Cors tensorflow>=2.0.0 transformers

# Set the default command to run when the container starts
WORKDIR /app

5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
![thumbnail](./resource/thumb.png)

> 릴리즈 버전 [Release v1.0](https://github.com/DGU-ITRC/PLASS_QNA/releases/tag/v1.6)
TBU
54 changes: 54 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import argparse
import os
import torch
import random
import numpy as np
from transformers import DistilBertTokenizerFast
from transformers import DistilBertForQuestionAnswering

def init_args(context, question):
if context is None or question is None:
context = "Stephen Silvagni (born 31 May 1967) is a former Australian rules footballer for the Carlton Football Club."
question = "What was the name of Stephen Silvagni's team?"
args = {
'context': context,
'question': question,
'seed': 42,
'save_dir': 'save/baseline-01',
}
return args

def set_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

def predict(context, question):
args = init_args(context, question)
set_seed(args['seed'])
checkpoint_path = os.path.join(args['save_dir'], 'checkpoint')
model = DistilBertForQuestionAnswering.from_pretrained(checkpoint_path)
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
args['device'] = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
os.environ["TOKENIZERS_PARALLELISM"] = "false"
context = args['context']
question = args['question']
inputs = tokenizer(question, context, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs, )
start_index = outputs.start_logits.argmax()
end_index = outputs.end_logits.argmax()
start_idx = int(start_index.numpy())
end_idx = int(end_index.numpy())
predict_tokens = inputs.input_ids[0,start_index:end_index+1]
predict_answer = tokenizer.decode(predict_tokens)
result = {'context': context, 'question': question, 'start_idx': start_idx, 'end_idx': end_idx, 'answer': predict_answer}
print(result)
return result


if __name__ == '__main__':
context = "Stephen Silvagni (born 31 May 1967) is a former Australian rules footballer for the Carlton Football Club."
question = "What was the name of Stephen Silvagni's team?"
predict(context, question)
Binary file added resource/thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
26 changes: 26 additions & 0 deletions save/baseline-01/baseline-01/log_train.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[06.08.23 20:20:39] Args: {
"batch_size": 16,
"do_eval": false,
"do_test": false,
"do_train": true,
"eval": false,
"eval_datasets": "race,relation_extraction,duorc",
"eval_dir": "datasets/oodomain_test",
"eval_every": 5000,
"lr": 3e-05,
"num_epochs": 3,
"num_visuals": 10,
"recompute_features": false,
"run_name": "baseline",
"save_dir": "save/baseline-01/baseline-01",
"seed": 42,
"sub_file": "",
"train": false,
"train_datasets": "squad,nat_questions,newsqa",
"train_dir": "datasets/indomain_train",
"val_dir": "datasets/indomain_val",
"visualize_predictions": false
}
[06.08.23 20:20:39] Preparing Training Data...
[06.08.23 20:21:46] Preparing Validation Data...
[06.08.23 20:22:00] Epoch: 0
Binary file not shown.
26 changes: 26 additions & 0 deletions save/baseline-01/baseline-02/log_train.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[06.08.23 20:33:01] Args: {
"batch_size": 16,
"do_eval": false,
"do_test": false,
"do_train": true,
"eval": false,
"eval_datasets": "race,relation_extraction,duorc",
"eval_dir": "datasets/oodomain_test",
"eval_every": 5000,
"lr": 3e-05,
"num_epochs": 3,
"num_visuals": 10,
"recompute_features": false,
"run_name": "baseline",
"save_dir": "save/baseline-01/baseline-02",
"seed": 42,
"sub_file": "",
"train": false,
"train_datasets": "squad,nat_questions,newsqa",
"train_dir": "datasets/indomain_train",
"val_dir": "datasets/indomain_val",
"visualize_predictions": false
}
[06.08.23 20:33:01] Preparing Training Data...
[06.08.23 20:34:05] Preparing Validation Data...
[06.08.23 20:34:18] Epoch: 0
Binary file not shown.
26 changes: 26 additions & 0 deletions save/baseline-01/baseline-03/log_train.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[06.08.23 20:35:04] Args: {
"batch_size": 16,
"do_eval": false,
"do_test": false,
"do_train": true,
"eval": false,
"eval_datasets": "race,relation_extraction,duorc",
"eval_dir": "datasets/oodomain_test",
"eval_every": 5000,
"lr": 3e-05,
"num_epochs": 3,
"num_visuals": 10,
"recompute_features": false,
"run_name": "baseline",
"save_dir": "save/baseline-01/baseline-03",
"seed": 42,
"sub_file": "",
"train": false,
"train_datasets": "squad,nat_questions,newsqa",
"train_dir": "datasets/indomain_train",
"val_dir": "datasets/indomain_val",
"visualize_predictions": false
}
[06.08.23 20:35:04] Preparing Training Data...
[06.08.23 20:36:06] Preparing Validation Data...
[06.08.23 20:36:19] Epoch: 0
Binary file not shown.
27 changes: 27 additions & 0 deletions save/baseline-01/baseline-04/log_train.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[06.08.23 20:40:51] Args: {
"batch_size": 16,
"do_eval": false,
"do_test": false,
"do_train": true,
"eval": false,
"eval_datasets": "race,relation_extraction,duorc",
"eval_dir": "datasets/oodomain_test",
"eval_every": 5000,
"lr": 3e-05,
"num_epochs": 3,
"num_visuals": 10,
"recompute_features": false,
"run_name": "baseline",
"save_dir": "save/baseline-01/baseline-04",
"seed": 42,
"sub_file": "",
"train": false,
"train_datasets": "squad,nat_questions,newsqa",
"train_dir": "datasets/indomain_train",
"val_dir": "datasets/indomain_val",
"visualize_predictions": false
}
[06.08.23 20:40:51] Preparing Training Data...
[06.08.23 20:42:57] Preparing Validation Data...
[06.08.23 20:43:16] Epoch: 0
[06.08.23 20:43:17] Evaluating at step 0...
Empty file.
23 changes: 23 additions & 0 deletions save/baseline-01/checkpoint/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"_name_or_path": "distilbert-base-uncased",
"activation": "gelu",
"architectures": [
"DistilBertForQuestionAnswering"
],
"attention_dropout": 0.1,
"dim": 768,
"dropout": 0.1,
"hidden_dim": 3072,
"initializer_range": 0.02,
"max_position_embeddings": 512,
"model_type": "distilbert",
"n_heads": 12,
"n_layers": 6,
"pad_token_id": 0,
"qa_dropout": 0.1,
"seq_classif_dropout": 0.2,
"sinusoidal_pos_embds": false,
"tie_weights_": true,
"transformers_version": "4.2.2",
"vocab_size": 30522
}
8 changes: 8 additions & 0 deletions save/baseline-01/log_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[06.09.23 01:22:42] Eval F1: -1.00, EM: -1.00
[06.09.23 01:22:42] Writing submission file to save/baseline-01/test_mtl_submission.csv...
[06.12.23 16:54:13] Eval F1: -1.00, EM: -1.00
[06.12.23 16:54:13] Writing submission file to save/baseline-01/test_mtl_submission.csv...
[06.13.23 17:38:30] Eval F1: -1.00, EM: -1.00
[06.13.23 17:38:30] Writing submission file to save/baseline-01/test_mtl_submission.csv...
[06.15.23 20:54:55] Eval F1: -1.00, EM: -1.00
[06.15.23 20:54:55] Writing submission file to save/baseline-01/test_mtl_submission.csv...
Loading

0 comments on commit cb99e5d

Please sign in to comment.