Skip to content

Commit

Permalink
Fix CLI model IO. (#5535)
Browse files Browse the repository at this point in the history
* Add test for comparing Python and CLI training result.
  • Loading branch information
trivialfis authored Apr 15, 2020
1 parent 0676a19 commit 468b159
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 9 deletions.
2 changes: 2 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def BuildCPU() {
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} build/testxgboost
"""

stash name: 'xgboost_cli', includes: 'xgboost'
deleteDir()
}
}
Expand Down Expand Up @@ -282,6 +283,7 @@ def TestPythonCPU() {
node('linux && cpu') {
unstash name: 'xgboost_whl_cuda9'
unstash name: 'srcs'
unstash name: 'xgboost_cli'
echo "Test Python CPU"
def container_type = "cpu"
def docker_binary = "docker"
Expand Down
2 changes: 2 additions & 0 deletions Jenkinsfile-win64
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def BuildWin64() {
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
echo 'Stashing C++ test executable (testxgboost)...'
stash name: 'xgboost_cpp_tests', includes: 'build/testxgboost.exe'
stash name: 'xgboost_cli', includes: 'xgboost.exe'
deleteDir()
}
}
Expand All @@ -104,6 +105,7 @@ def TestWin64CPU() {
node('win64 && cpu') {
unstash name: 'srcs'
unstash name: 'xgboost_whl'
unstash name: 'xgboost_cli'
echo "Test Win64 CPU"
echo "Installing Python wheel..."
bat "conda activate && (python -m pip uninstall -y xgboost || cd .)"
Expand Down
14 changes: 5 additions & 9 deletions src/cli_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,10 @@ struct CLIParam : public XGBoostParameter<CLIParam> {
// constraint.
if (name_pred == "stdout") {
save_period = 0;
this->cfg.emplace_back(std::make_pair("silent", "0"));
}
if (dsplit == 0 && rabit::IsDistributed()) {
dsplit = 2;
}
if (rabit::GetRank() != 0) {
this->cfg.emplace_back(std::make_pair("silent", "1"));
}
}
};

Expand Down Expand Up @@ -189,7 +185,7 @@ void CLITrain(const CLIParam& param) {
if (param.model_in != "NULL") {
std::unique_ptr<dmlc::Stream> fi(
dmlc::Stream::Create(param.model_in.c_str(), "r"));
learner->Load(fi.get());
learner->LoadModel(fi.get());
learner->SetParams(param.cfg);
} else {
learner->SetParams(param.cfg);
Expand Down Expand Up @@ -229,7 +225,7 @@ void CLITrain(const CLIParam& param) {
<< i + 1 << ".model";
std::unique_ptr<dmlc::Stream> fo(
dmlc::Stream::Create(os.str().c_str(), "w"));
learner->Save(fo.get());
learner->SaveModel(fo.get());
}

if (learner->AllowLazyCheckPoint()) {
Expand All @@ -255,7 +251,7 @@ void CLITrain(const CLIParam& param) {
}
std::unique_ptr<dmlc::Stream> fo(
dmlc::Stream::Create(os.str().c_str(), "w"));
learner->Save(fo.get());
learner->SaveModel(fo.get());
}

double elapsed = dmlc::GetTime() - start;
Expand All @@ -277,7 +273,7 @@ void CLIDumpModel(const CLIParam& param) {
std::unique_ptr<dmlc::Stream> fi(
dmlc::Stream::Create(param.model_in.c_str(), "r"));
learner->SetParams(param.cfg);
learner->Load(fi.get());
learner->LoadModel(fi.get());
// dump data
std::vector<std::string> dump = learner->DumpModel(
fmap, param.dump_stats, param.dump_format);
Expand Down Expand Up @@ -316,7 +312,7 @@ void CLIPredict(const CLIParam& param) {
std::unique_ptr<Learner> learner(Learner::Create({}));
std::unique_ptr<dmlc::Stream> fi(
dmlc::Stream::Create(param.model_in.c_str(), "r"));
learner->Load(fi.get());
learner->LoadModel(fi.get());
learner->SetParams(param.cfg);

LOG(INFO) << "start prediction...";
Expand Down
91 changes: 91 additions & 0 deletions tests/python/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
import tempfile
import unittest
import platform
import xgboost
import subprocess
import numpy


class TestCLI(unittest.TestCase):
template = '''
booster = gbtree
objective = reg:squarederror
eta = 1.0
gamma = 1.0
seed = 0
min_child_weight = 0
max_depth = 3
task = {task}
model_in = {model_in}
model_out = {model_out}
test_path = {test_path}
name_pred = {name_pred}
num_round = 10
data = {data_path}
eval[test] = {data_path}
'''

def test_cli_model(self):
curdir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
project_root = os.path.normpath(
os.path.join(curdir, os.path.pardir, os.path.pardir))
data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
root=project_root)

if platform.system() == 'Windows':
exe = 'xgboost.exe'
else:
exe = 'xgboost'
exe = os.path.join(project_root, exe)
assert os.path.exists(exe)

with tempfile.TemporaryDirectory() as tmpdir:
model_out = os.path.join(tmpdir, 'test_load_cli_model')
config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')

train_conf = self.template.format(data_path=data_path,
task='train',
model_in='NULL',
model_out=model_out,
test_path='NULL',
name_pred='NULL')
with open(config_path, 'w') as fd:
fd.write(train_conf)

subprocess.run([exe, config_path])

predict_out = os.path.join(tmpdir,
'test_load_cli_model-prediction')
predict_conf = self.template.format(task='pred',
data_path=data_path,
model_in=model_out,
model_out='NULL',
test_path=data_path,
name_pred=predict_out)
with open(config_path, 'w') as fd:
fd.write(predict_conf)

subprocess.run([exe, config_path])

cli_predt = numpy.loadtxt(predict_out)

parameters = {
'booster': 'gbtree',
'objective': 'reg:squarederror',
'eta': 1.0,
'gamma': 1.0,
'seed': 0,
'min_child_weight': 0,
'max_depth': 3
}
data = xgboost.DMatrix(data_path)
booster = xgboost.train(parameters, data, num_boost_round=10)
py_predt = booster.predict(data)

numpy.testing.assert_allclose(cli_predt, py_predt)

cli_model = xgboost.Booster(model_file=model_out)
cli_predt = cli_model.predict(data)
numpy.testing.assert_allclose(cli_predt, py_predt)

0 comments on commit 468b159

Please sign in to comment.