chujiezheng
diff --git a/‎README.md
+112-2 b/‎README.md
+112-2
diff --git a/‎main.py
+75 b/‎main.py
+75
diff --git a/‎myCoTK/__init__.py b/‎myCoTK/__init__.py
@@ -1,2 +1,112 @@
-# DiffKS
-Difference-aware Knowledge Selection
+# DiffKS: Difference-aware Knowledge Selection
+
+Codes for the paper: **Difference-aware Knowledge Selection for Knowledge-grounded Conversation Generation**
+
+Please cite this repository using the following reference:
+
+```bib
+@inproceedings{diffks-zheng-2020,
+  title="{D}ifference-aware Knowledge Selection for Knowledge-grounded Conversation Generation",
+  author="Zheng, Chujie  and
+      Cao, Yunbo  and
+      Jiang, Daxin and
+      Huang, Minlie",
+  booktitle="Findings of EMNLP",
+  year="2020"
+}
+```
+
+## Prepare Data
+
+Download the [Wizard of Wikipedia](https://drive.google.com/drive/folders/1eowwYSfJKaDtYgKHZVqh8alNmqP3jv9A?usp=sharing) dataset (downloaded using [Parlai](https://github.com/facebookresearch/ParlAI), please refer to the [Sequential Latent Knowledge Selection](https://github.com/bckim92/sequential-knowledge-transformer) for the download details) and put the files in the folder `./Wizard-of-Wikipedia`, or download the [Holl-E](https://drive.google.com/drive/folders/1xQBRDs5q_2xLOdOpbq7UeAmUM0Ht370A?usp=sharing) dataset and put the files in the folder `./Holl-E`.
+
+For Wizard of Wikipedia (WoW):
+
+```bash
+python prepare_wow_data.py
+```
+
+For Holl-E:
+
+```bash
+python prepare_holl_data.py
+```
+
+Besides, download the pretrained [wordvector](https://apache-mxnet.s3.cn-north-1.amazonaws.com.cn/gluon/embeddings/glove/glove.6B.zip), unzip the files in `./` and rename the 300-d embedding file as `glove.txt`.
+
+## Training
+
+Our codes now only support single-GPU training, which requires at least 12GB memory.
+
+For Wizard of Wikipedia:
+
+```bash
+python run.py \
+    --mode train \
+    --dataset WizardOfWiki \
+    --datapath ./Wizard-of-Wikipedia/prepared_data \
+    --wvpath ./ \
+    --cuda 0 \
+    --droprate 0.5 \
+    --disentangle \ # the disentangled model, delete this line if train the fused model
+    --hist_len 2 \
+    --hist_weights 0.7 0.3 \
+    --out_dir ./output \
+    --model_dir ./model \
+    --cache
+```
+
+For Holl-E:
+
+```bash
+python run.py \
+    --mode train \
+    --dataset HollE \
+    --datapath ./Holl-E/prepared_data \
+    --wvpath ./ \
+    --cuda 0 \
+    --droprate 0.5 \
+    --disentangle \ # the disentangled model, delete this line if train the fused model
+    --hist_len 2 \
+    --hist_weights 0.7 0.3 \
+    --out_dir ./output \
+    --model_dir ./model \
+    --cache
+```
+
+You can modify `run.py` and `myCoTK/dataloader.py` to change more hyperparameters.
+
+## Evaluation
+
+For Wizard of Wikipedia:
+
+```bash
+python run.py \
+    --mode test \
+    --dataset WizardOfWiki \
+    --cuda 0 \
+    --restore best \
+    --disentangle \ # the disentangled model, delete this line if train the fused model
+    --hist_len 2 \
+    --hist_weights 0.7 0.3 \
+    --out_dir ./output \
+    --model_dir ./model \
+    --cache
+```
+
+For Holl-E:
+
+```bash
+python run.py \
+    --mode test \
+    --dataset Holl-E \
+    --cuda 0 \
+    --restore best \
+    --disentangle \ # the disentangled model, delete this line if train the fused model
+    --hist_len 2 \
+    --hist_weights 0.7 0.3 \
+    --out_dir ./output \
+    --model_dir ./model \
+    --cache
+```
+
@@ -0,0 +1,75 @@
+# coding:utf-8
+import logging
+import json
+import os
+
+from cotk.wordvector import WordVector, Glove
+from myCoTK.dataloader import WizardOfWiki, HollE
+from utils import debug, try_cache, cuda_init, Storage
+from seq2seq import Seq2seq
+
+def main(args):
+    logging.basicConfig(filename=0,
+        level=logging.DEBUG,
+        format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',
+        datefmt='%H:%M:%S')
+    
+    if args.debug:
+        debug()
+    logging.info(json.dumps(args, indent=2))
+    
+    cuda_init(args.cuda_num, args.cuda)
+    
+    volatile = Storage()
+    volatile.load_exclude_set = args.load_exclude_set
+    volatile.restoreCallback = args.restoreCallback
+    
+    if args.dataset == 'WizardOfWiki':
+        data_class = WizardOfWiki
+    elif args.dataset == 'HollE':
+        data_class = HollE
+    else:
+        raise ValueError
+    wordvec_class = WordVector.load_class(args.wvclass)
+    if wordvec_class is None:
+        wordvec_class = Glove
+    
+    if not os.path.exists(args.cache_dir):
+        os.mkdir(args.cache_dir)
+    args.cache_dir = os.path.join(args.cache_dir, args.dataset)
+    
+    if not os.path.exists(args.out_dir):
+        os.mkdir(args.out_dir)
+    args.out_dir = os.path.join(args.out_dir, args.dataset)
+    
+    if not os.path.exists(args.model_dir):
+        os.mkdir(args.model_dir)
+    if args.dataset not in args.model_dir:
+        args.model_dir = os.path.join(args.model_dir, args.dataset)
+    
+    if args.cache:
+        dm = try_cache(data_class, (args.datapath,), args.cache_dir)
+        volatile.wordvec = try_cache(
+            lambda wv, ez, vl: wordvec_class(wv).load_matrix(ez, vl),
+            (args.wvpath, args.embedding_size, dm.vocab_list),
+            args.cache_dir, wordvec_class.__name__)
+    else:
+        dm = data_class(args.datapath)
+        wv = wordvec_class(args.wvpath)
+        volatile.wordvec = wv.load_matrix(args.embedding_size, dm.vocab_list)
+    
+    volatile.dm = dm
+    
+    param = Storage()
+    param.args = args
+    param.volatile = volatile
+    
+    model = Seq2seq(param)
+    if args.mode == "train":
+        model.train_process()
+    elif args.mode == "test":
+        model.test_process()
+    elif args.mode == 'dev':
+        model.test_dev()
+    else:
+        raise ValueError("Unknown mode")