Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

int vector mapper #2 #236

Open
wants to merge 11 commits into
base: develop-v3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 102 additions & 37 deletions include/sdsl/int_vector_mapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class int_vector_mapper
typedef typename int_vector<t_width>::value_type value_type;
typedef typename int_vector<t_width>::size_type size_type;
typedef typename int_vector<t_width>::int_width_type width_type;
typedef typename int_vector<t_width>::const_iterator const_iterator;
typedef typename int_vector<t_width>::iterator iterator;
typedef typename int_vector<t_width>::const_reference const_reference;
typedef typename int_vector<t_width>::reference reference;
public:
const size_type append_block_size = 1000000;
private:
Expand All @@ -31,7 +35,8 @@ class int_vector_mapper
std::string m_file_name;
bool m_delete_on_close;
private:
void mmap_file() {
void mmap_file()
{
if (!(t_mode&std::ios_base::out)) { // read only
m_mapped_data = (uint8_t*)mmap(NULL,
m_file_size_bytes,
Expand All @@ -57,11 +62,13 @@ class int_vector_mapper
}
}
public:
/* maps a resource. thus should not be copied or default constructed */
int_vector_mapper() = delete;
int_vector_mapper(const int_vector_mapper&) = delete;
int_vector_mapper& operator=(const int_vector_mapper&) = delete;
public:
~int_vector_mapper() {
~int_vector_mapper()
{
if (m_mapped_data) {
if (t_mode&std::ios_base::out) { // write was possible
if (m_data_offset) {
Expand All @@ -82,6 +89,7 @@ class int_vector_mapper
}
}

/* unmap data */
munmap(m_mapped_data, m_file_size_bytes);

if (t_mode&std::ios_base::out) {
Expand Down Expand Up @@ -112,7 +120,8 @@ class int_vector_mapper
m_wrapper.m_data = nullptr;
m_wrapper.m_size = 0;
}
int_vector_mapper(int_vector_mapper&& ivm) {
int_vector_mapper(int_vector_mapper&& ivm)
{
m_wrapper.m_data = ivm.m_wrapper.m_data;
m_wrapper.m_size = ivm.m_wrapper.m_size;
m_wrapper.width(ivm.m_wrapper.width());
Expand All @@ -123,7 +132,8 @@ class int_vector_mapper
ivm.m_mapped_data = nullptr;
ivm.m_fd = -1;
}
int_vector_mapper& operator=(int_vector_mapper&& ivm) {
int_vector_mapper& operator=(int_vector_mapper&& ivm)
{
m_wrapper.m_data = ivm.m_wrapper.m_data;
m_wrapper.m_size = ivm.m_wrapper.m_size;
m_wrapper.width(ivm.m_wrapper.width());
Expand All @@ -140,7 +150,8 @@ class int_vector_mapper
int_vector_mapper(const std::string filename,
bool is_plain = false,
bool delete_on_close = false) :
m_file_name(filename), m_delete_on_close(delete_on_close) {
m_file_name(filename), m_delete_on_close(delete_on_close)
{
size_type size_in_bits = 0;
uint8_t int_width = t_width;
{
Expand Down Expand Up @@ -189,14 +200,17 @@ class int_vector_mapper
}
std::string file_name() const { return m_file_name; }
width_type width() const { return m_wrapper.width(); }
void width(const uint8_t new_int_width) {
void width(const uint8_t new_int_width)
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'width'");
m_wrapper.width(new_int_width);
}
size_type size() const {
size_type size() const
{
return m_wrapper.size();
}
void bit_resize(const size_type bit_size) {
void bit_resize(const size_type bit_size)
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'bit_resize'");
size_type new_size_in_bytes = ((bit_size + 63) >> 6) << 3;
if (m_file_size_bytes != new_size_in_bytes + m_data_offset) {
Expand All @@ -219,55 +233,65 @@ class int_vector_mapper
m_wrapper.m_size = bit_size;
}


void resize(const size_type size) {
void resize(const size_type size)
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'resize'");
size_type size_in_bits = size * width();
bit_resize(size_in_bits);
}

auto begin() -> typename int_vector<t_width>::iterator {
iterator begin()
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'begin'");
return m_wrapper.begin();
}
auto end() -> typename int_vector<t_width>::iterator {
iterator end()
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'end'");
return m_wrapper.end();
}
auto begin() const -> typename int_vector<t_width>::const_iterator {
const_iterator begin() const
{
return m_wrapper.begin();
}
auto end() const -> typename int_vector<t_width>::const_iterator {
const_iterator end() const
{
return m_wrapper.end();
}
auto cbegin() const -> typename int_vector<t_width>::const_iterator {
const_iterator cbegin() const
{
return m_wrapper.begin();
}
auto cend() const -> typename int_vector<t_width>::const_iterator {
const_iterator cend() const
{
return m_wrapper.end();
}
auto operator[](const size_type& idx) const
-> typename int_vector<t_width>::const_reference {
const_reference operator[](const size_type& idx) const
{
return m_wrapper[idx];
}
auto operator[](const size_type& idx)
-> typename int_vector<t_width>::reference {
reference operator[](const size_type& idx)
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'operator[]'");
return m_wrapper[idx];
}
const uint64_t* data() const { return m_wrapper.data(); }
uint64_t* data() {
uint64_t* data()
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'data'");
return m_wrapper.data();
}
value_type get_int(size_type idx, const uint8_t len = 64) const {
value_type get_int(size_type idx, const uint8_t len = 64) const
{
return m_wrapper.get_int(idx, len);
}
void set_int(size_type idx, value_type x, const uint8_t len = 64) {
void set_int(size_type idx, value_type x, const uint8_t len = 64)
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'set_int'");
m_wrapper.set_int(idx, x, len);
}
void push_back(value_type x) {
void push_back(value_type x)
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'push_back'");
if (capacity() < size() + 1) {
size_type old_size = m_wrapper.m_size;
Expand All @@ -279,41 +303,55 @@ class int_vector_mapper
m_wrapper.m_size += width();
m_wrapper[size()-1] = x;
}
size_type capacity() const {
size_type capacity() const
{
size_t data_size_in_bits = 8 * (m_file_size_bytes - m_data_offset);
return data_size_in_bits / width();
}
size_type bit_size() const {
size_type bit_size() const
{
return m_wrapper.bit_size();
}
template<class container>
bool operator==(const container& v) const {
bool operator==(const container& v) const
{
return std::equal(begin(), end(), v.begin());
}
bool operator==(const int_vector<t_width>& v) const {
bool operator==(const int_vector<t_width>& v) const
{
return m_wrapper == v;
}
bool operator==(const int_vector_mapper& v) const {
bool operator==(const int_vector_mapper& v) const
{
return m_wrapper == v.m_wrapper;
}
template<class container>
bool operator!=(const container& v) const {
bool operator!=(const container& v) const
{
return !(*this==v);
}
void flip() {
void flip()
{
static_assert(t_mode & std::ios_base::out,"int_vector_mapper: must be opened in in+out mode for 'flip'");
m_wrapper.flip();
}
bool empty() const {
bool empty() const
{
return m_wrapper.empty();
}
size_type max_size() const
{
return m_wrapper.max_size();
}
};

/* temporary buffer that gets deleted later */
template <uint8_t t_width = 0>
class temp_file_buffer
{
private:
static std::string tmp_file(const std::string& dir) {
static std::string tmp_file(const std::string& dir)
{
char tmp_file_name[1024] = {0};
sprintf(tmp_file_name, "%s/tmp_mapper_file_XXXXXX.sdsl",dir.c_str());
int fd = mkstemps(tmp_file_name,5);
Expand All @@ -324,25 +362,52 @@ class temp_file_buffer
return std::string(tmp_file_name,strlen(tmp_file_name));
}
public:
static int_vector_mapper<t_width> create() {
static int_vector_mapper<t_width> create()
{
auto file_name = tmp_file("/tmp");
return create(file_name);
}
static int_vector_mapper<t_width> create(const cache_config& config) {
static int_vector_mapper<t_width> create(const cache_config& config)
{
auto file_name = tmp_file(config.dir);
return create(file_name);
}
static int_vector_mapper<t_width> create(const std::string& file_name) {
static int_vector_mapper<t_width> create(const std::string& file_name)
{
//write empty int_vector to init the file
int_vector<t_width> tmp_vector;
store_to_file(tmp_vector,file_name);
return int_vector_mapper<t_width,std::ios_base::out|std::ios_base::in>(file_name,false,true);
}
};

// creates emtpy int_vector<> that will not be deleted
template <uint8_t t_width = 0>
class write_out_buffer
{
public:
static int_vector_mapper<t_width> create(const std::string& key,cache_config& config)
{
auto file_name = cache_file_name(key,config);
auto tmp = create(file_name);
register_cache_file(key,config);
return std::move(tmp);
}
static int_vector_mapper<t_width> create(const std::string& file_name)
{
// write empty int_vector to init the file
int_vector<t_width> tmp_vector;
store_to_file(tmp_vector,file_name);
return int_vector_mapper<t_width,std::ios_base::out|std::ios_base::in>(file_name,false,false);
}
};

template<std::ios_base::openmode t_mode = std::ios_base::out|std::ios_base::in>
using bit_vector_mapper = int_vector_mapper<1,t_mode>;

} // end of namespace
template<uint8_t t_width = 0>
using read_only_mapper = const int_vector_mapper<t_width,std::ios_base::in>;

}

#endif
47 changes: 46 additions & 1 deletion test/IntVectorMapperTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class IntVectorMapperTest : public ::testing::Test

virtual ~IntVectorMapperTest() {}

virtual void SetUp() {
virtual void SetUp()
{
std::mt19937_64 rng;
{
std::uniform_int_distribution<uint64_t> distribution(1, 100000);
Expand Down Expand Up @@ -279,6 +280,50 @@ TEST_F(IntVectorMapperTest, temp_buffer_test)
}
}

TEST_F(IntVectorMapperTest, read_only_mapper)
{
for (const auto& size : vec_sizes) {
sdsl::int_vector<> vec(size);
sdsl::util::set_to_id(vec);
sdsl::store_to_file(vec,"tmp/bit_vector_mapper_test");
{
sdsl::read_only_mapper<> rvec("tmp/bit_vector_mapper_test");
ASSERT_EQ(rvec.width(),(uint8_t)64);
ASSERT_EQ(rvec.size(),(size_t)vec.size());
ASSERT_TRUE(std::equal(rvec.begin(),rvec.end(),vec.begin()));
}
// check that the file is still there
std::ifstream cfs("tmp/bit_vector_mapper_test");
ASSERT_TRUE(cfs.is_open());
sdsl::remove("tmp/bit_vector_mapper_test");
}
}

TEST_F(IntVectorMapperTest, write_out_buffer)
{
for (const auto& size : vec_sizes) {
sdsl::int_vector<> vec(size);
sdsl::util::set_to_id(vec);
std::string tmp_file_name = "tmp/write_out_buffer.sdsl";
{
auto buf = sdsl::write_out_buffer<31>::create(tmp_file_name);
ASSERT_EQ(buf.file_name(),tmp_file_name);
ASSERT_EQ(buf.width(),(uint8_t)31);
ASSERT_EQ(buf.size(),(size_t)0);
ASSERT_TRUE(buf.empty());
for (const auto& val : vec) {
buf.push_back(val);
}
ASSERT_EQ(buf.size(),vec.size());
ASSERT_TRUE(std::equal(buf.begin(),buf.end(),vec.begin()));
}
// check that the file is NOT gone
std::ifstream cfs(tmp_file_name);
ASSERT_TRUE(cfs.is_open());
sdsl::remove(tmp_file_name);
}
}

} // namespace

int main(int argc, char** argv)
Expand Down