Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VIAME] Modify libsvm to save/load to/from bytes #344

Open
wants to merge 2 commits into
base: viame/master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion TPL/libsvm-3.1-custom/Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
CXX ?= g++
CFLAGS = -Wall -Wconversion -O3 -fPIC
SHVER = 2
PYTHON = python2.7
.PHONY = test

all: svm-train svm-predict svm-scale svm-prob binary-train binary-predict

lib: svm.o
$(CXX) -shared -dynamiclib svm.o -o libsvm.so.$(SHVER)

svm-test: svm-test.o svm.o
$(CXX) svm-test.o svm.o -lgtest -lpthread -o svm-test

test: svm-test lib
mkdir -p ./python/lib && cp libsvm.so.$(SHVER) python/lib/
valgrind --leak-check=full ./svm-test
cd python && $(PYTHON) svm_test.py

svm-predict: svm-predict.c svm.o
$(CXX) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm
svm-train: svm-train.c svm.o
Expand All @@ -21,11 +31,16 @@ svm.o: svm.cpp svm.h
eval.o: eval.cpp eval.h svm.h
$(CXX) $(CFLAGS) -c eval.cpp -o eval.o

svm-test.o: svm-test.cpp svm.o
$(CXX) -std=c++11 $(CFLAGS) -c svm-test.cpp

binary-predict: binary-predict.c eval.o svm.o
$(CXX) $(CFLAGS) binary-predict.c eval.o svm.o -o binary-predict -lm
binary-train: binary-train.c eval.o svm.o
$(CXX) $(CFLAGS) binary-train.c eval.o svm.o -o binary-train -lm


clean:
rm -f *~ svm.o eval.o svm-train svm-predict svm-scale svm-prob binary-train binary-predict libsvm.so.$(SHVER)
rm -f *~ svm.o svm-test.o eval.o svm-train svm-predict svm-scale \
svm-prob binary-train binary-predict svm-test libsvm.so.$(SHVER)
rm -R ./python/lib
3 changes: 2 additions & 1 deletion TPL/libsvm-3.1-custom/python/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ def toPyModel(model_ptr):

fillprototype(libsvm.svm_save_model, c_int, [c_char_p, POINTER(svm_model)])
fillprototype(libsvm.svm_load_model, POINTER(svm_model), [c_char_p])
fillprototype(libsvm.svm_load_model_from_bytes, POINTER(svm_model), [POINTER(c_ubyte), c_int])
fillprototype(libsvm.load_model_from_bytes, POINTER(svm_model), [POINTER(c_ubyte), c_int])
fillprototype(libsvm.convert_model_to_bytes, None, [POINTER(svm_model), POINTER(POINTER(c_ubyte)), POINTER(c_size_t)])

fillprototype(libsvm.svm_get_svm_type, c_int, [POINTER(svm_model)])
fillprototype(libsvm.svm_get_nr_class, c_int, [POINTER(svm_model)])
Expand Down
35 changes: 35 additions & 0 deletions TPL/libsvm-3.1-custom/python/svm_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from svmutil import *
import unittest
import filecmp


class TestSVM(unittest.TestCase):

model_names = [
"../data/svmguide1.model",
"../data/svmguide1.scale.model",
]

def save_model_and_check(self, model_true, model_false):
model = svm_load_model(model_true)
bytes_list = svm_conv_model_to_bytes(model)
tmp_file_name = "/tmp/tmp.model"
svm_save_model(tmp_file_name, svm_load_model_from_bytes(bytes_list))
assert(filecmp.cmp(tmp_file_name, model_true) is True)
assert(filecmp.cmp(tmp_file_name, model_false) is False)

def save_model_and_check_bytes_file_name(self, model_true, model_false):
model = svm_load_model(model_true)
bytes_list = svm_conv_model_to_bytes(model)
tmp_file_name = '/tmp/tmp.model'.encode("utf-8")
svm_save_model(tmp_file_name, svm_load_model_from_bytes(bytes_list))
assert(filecmp.cmp(tmp_file_name, model_true) is True)
assert(filecmp.cmp(tmp_file_name, model_false) is False)

def test_save_model(self):
self.save_model_and_check(self.model_names[0], self.model_names[1])
self.save_model_and_check(self.model_names[1], self.model_names[0])


if __name__ == "__main__":
unittest.main()
32 changes: 26 additions & 6 deletions TPL/libsvm-3.1-custom/python/svmutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
from __future__ import print_function

from svm import *
import numpy as np

import sys
import six

def svm_read_problem(data_file_name):
"""
Expand Down Expand Up @@ -33,31 +35,49 @@ def svm_load_model(model_file_name):

Load a LIBSVM model from model_file_name and return.
"""
model = libsvm.svm_load_model(model_file_name)
# Insconsistency between Python 3 and Python 2.
if isinstance(model_file_name, six.binary_type):
model_file_name_bytes = model_file_name
else:
model_file_name_bytes = six.b(model_file_name)

model = libsvm.svm_load_model(model_file_name_bytes)
if not model:
print("can't open model file %s" % model_file_name)
return None
model = toPyModel(model)
return model

def svm_load_model_from_bytes(bytes):
model = libsvm.svm_load_model_from_bytes(bytes, len(bytes))
def svm_load_model_from_bytes(bytes_list):
bytes_list_pointer = bytes_list.ctypes.data_as(POINTER(c_ubyte))
model = libsvm.load_model_from_bytes(bytes_list_pointer, len(bytes_list))
if not model:
print("can't load model from the bytes")
return None
model = toPyModel(model)
return model

def svm_conv_model_to_bytes(model):
bytes_list = POINTER(c_ubyte)()
bytes_list_len = c_size_t()
libsvm.convert_model_to_bytes(model, byref(bytes_list), byref(bytes_list_len))
array_type = c_ubyte * bytes_list_len.value
array_pointer = cast(bytes_list, POINTER(array_type))
py_bytes_list = np.frombuffer(array_pointer.contents, dtype=np.uint8)
return py_bytes_list

def svm_save_model(model_file_name, model):
"""
svm_save_model(model_file_name, model) -> None

Save a LIBSVM model to the file model_file_name.
"""
if sys.version_info >= (3, 0):
libsvm.svm_save_model(bytes(model_file_name, encoding='utf-8'), model)
# Insconsistency between Python 3 and Python 2.
if isinstance(model_file_name, six.binary_type):
model_file_name_bytes = model_file_name
else:
libsvm.svm_save_model(model_file_name, model)
model_file_name_bytes = six.b(model_file_name)
libsvm.svm_save_model(model_file_name_bytes, model)

def evaluations(ty, pv):
"""
Expand Down
94 changes: 94 additions & 0 deletions TPL/libsvm-3.1-custom/svm-test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//
// Created by chaturvedi on 2/20/18.
//

#include <gtest/gtest.h>
#include "svm.h"
#include <cmath>


class SVMTest : public ::testing::Test
{
protected:
svm_model *sample_model1;
svm_model *sample_model2;

virtual void SetUp() {
sample_model1 = svm_load_model("./data/svmguide1.model");
sample_model2 = svm_load_model("./data/svmguide1.scale.model");
}
virtual void TearDown(){
svm_free_and_destroy_model(&sample_model1);
svm_free_and_destroy_model(&sample_model2);
}

public:
// TODO (Mmanu) : Suboptimal because we're saving a file and diff-ing it
// But letting it be because we're just testing
bool check_models_equal(svm_model* m, const char* orig_model_file_name){
const char *tmp_file_name = "/tmp/tmp_svm.model";
const char *diff_file = "/tmp/tmp_diff.op";

svm_save_model(tmp_file_name, m);
char buff[100];
// Use diff to make sure that the models written are exactly the same
sprintf(buff, "diff %s %s > %s", tmp_file_name, orig_model_file_name, diff_file);
system(buff);

// Check that the diff output is an empty file
FILE *fp = fopen(diff_file, "r");
// Got to the 0th place before end.
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
bool res = false;

fclose(fp);
if(size == 0) res = true;
std::remove(diff_file);
std::remove(tmp_file_name);
return res;
}

void test_model(const char *original_model_name, const char *other_model_name){
svm_model* original_model = svm_load_model(original_model_name);


unsigned char* p;
unsigned long size;
// Load and check the model from bytes array
convert_model_to_bytes(original_model, p, size);
svm_model* returned_model = load_model_from_bytes(p, size);
ASSERT_TRUE(check_models_equal(returned_model, original_model_name));
ASSERT_FALSE(check_models_equal(returned_model, other_model_name));
free(p);
svm_free_and_destroy_model(&returned_model);

// Load and check the model from bytes vector
std::vector<unsigned char>* p_vec = convert_model_to_bytes_vector(original_model);
svm_model* returned_model_vector = load_model_from_bytes_vector(p_vec);
ASSERT_TRUE(check_models_equal(returned_model_vector, original_model_name));
ASSERT_FALSE(check_models_equal(returned_model_vector, other_model_name));
delete p_vec;
svm_free_and_destroy_model(&returned_model_vector);
svm_free_and_destroy_model(&original_model);
}

};

TEST_F(SVMTest, checkFirstModel) {
const char* testing_model_name = "./data/svmguide1.model";
const char* other_model_name = "./data/svmguide1.scale.model";
test_model(testing_model_name, other_model_name);
}

TEST_F(SVMTest, checkSecondModel) {
const char* testing_model_name = "./data/svmguide1.scale.model";
const char* other_model_name = "./data/svmguide1.model";
test_model(testing_model_name, other_model_name);
}

int main(int argc, char** argv)
{
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
63 changes: 52 additions & 11 deletions TPL/libsvm-3.1-custom/svm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2729,11 +2729,8 @@ static const char *kernel_type_table[]=
"linear","polynomial","rbf","sigmoid","precomputed", "histogram", "NMI", NULL
};

int svm_save_model(const char *model_file_name, const svm_model *model)
int save_model_to_file_pointer(FILE *fp, const svm_model *model)
{
FILE *fp = fopen(model_file_name,"w");
if(fp==NULL) return -1;

const svm_parameter& param = model->param;

fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
Expand Down Expand Up @@ -2817,6 +2814,31 @@ int svm_save_model(const char *model_file_name, const svm_model *model)
else return 0;
}

int svm_save_model(const char *model_file_name, const svm_model *model){
FILE *fp = fopen(model_file_name, "w");
if(fp==NULL) return -1;
return save_model_to_file_pointer(fp, model);
}

void convert_model_to_bytes(svm_model* model, unsigned char* &buffer,
size_t &size) {
char *buf1 = NULL;
FILE *fp = open_memstream(&buf1, &size);
save_model_to_file_pointer(fp, model);
buffer = (unsigned char *) buf1;
}

std::vector<unsigned char>* convert_model_to_bytes_vector(svm_model* model){
size_t size;
unsigned char* buffer;
convert_model_to_bytes(model, buffer, size);
std::vector<unsigned char>* A = new std::vector<unsigned char>(size);
A -> assign(buffer, buffer + size);
free(buffer);

return A;
}

static char *line = NULL;
static int max_line_len;

Expand All @@ -2838,8 +2860,7 @@ static char* readline(FILE *input)
return line;
}

// TODO May need to remove this. Ask Matt.
svm_model *svm_load_model_from_bytes_vector(std::vector<unsigned char>& model_bytes) {
svm_model *load_model_from_bytes_vector(std::vector<unsigned char>& model_bytes) {
const char* tmp_file_name = "tmp_svm_cpp.model";
std::ofstream out(tmp_file_name, std::ios::binary);
out.write(reinterpret_cast<const char*>(model_bytes.data()), model_bytes.size());
Expand All @@ -2848,7 +2869,7 @@ svm_model *svm_load_model_from_bytes_vector(std::vector<unsigned char>& model_by
return m;
}

svm_model *svm_load_model_from_bytes(const unsigned char *model_bytes, int size) {
svm_model *load_model_from_bytes(const unsigned char *model_bytes, int size) {
const char* tmp_file_name = "tmp_svm_cpp.model";
std::ofstream out(tmp_file_name, std::ios::binary);
out.write(reinterpret_cast<const char*>(model_bytes),
Expand All @@ -2858,10 +2879,8 @@ svm_model *svm_load_model_from_bytes(const unsigned char *model_bytes, int size)
return m;
}

svm_model *svm_load_model(const char *model_file_name)
{
FILE *fp = fopen(model_file_name,"rb");
if(fp==NULL) return NULL;

svm_model* get_model_from_FILE_pointer(FILE *fp) {

// read parameters

Expand Down Expand Up @@ -3058,6 +3077,28 @@ svm_model *svm_load_model(const char *model_file_name)
return model;
}

svm_model *svm_load_model(const char *model_file_name) {
FILE *fp = fopen(model_file_name,"rb");
if(fp == NULL) return NULL;

return get_model_from_FILE_pointer(fp);
}

svm_model *load_model_from_bytes(unsigned char* buffer, size_t size) {
FILE *fp = fmemopen(buffer, size, "r");
if(fp == NULL) return NULL;
svm_model *model = get_model_from_FILE_pointer(fp);

return model;
}

svm_model *load_model_from_bytes_vector(std::vector<unsigned char> * v){
unsigned char* buf = v->data();
svm_model * model = load_model_from_bytes(buf, v->size());
return model;
}


void svm_free_model_content(svm_model* model_ptr)
{
if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
Expand Down
1 change: 1 addition & 0 deletions TPL/libsvm-3.1-custom/svm.def
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ EXPORTS
svm_check_probability_model @16
svm_set_print_string_function @17
svm_load_model_from_bytes @18
svm_conv_model_to_bytes @19
4 changes: 4 additions & 0 deletions TPL/libsvm-3.1-custom/svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ const char *svm_check_parameter(const struct svm_problem *prob, const struct svm
int svm_check_probability_model(const struct svm_model *model);

void svm_set_print_string_function(void (*print_func)(const char *));
void convert_model_to_bytes(svm_model *model, unsigned char* &buffer, size_t &size);
std::vector<unsigned char>* convert_model_to_bytes_vector(svm_model *model);
struct svm_model *load_model_from_bytes(unsigned char* buffer, size_t size);
struct svm_model *load_model_from_bytes_vector(std::vector<unsigned char> *v);

#ifdef __cplusplus
}
Expand Down