Skip to content

Commit

Permalink
Fix memory issues in CSVFieldList (#237)
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentlaucsb committed Jun 15, 2024
1 parent 5a05e6a commit 105b44d
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cmake-multi-platform.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ name: CMake on multiple platforms

on:
push:
branches: [ "master", "remove-werror" ]
branches: [ "master", "memory-fix-csvfieldlist" ]
pull_request:
branches: [ "master" ]

Expand Down
2 changes: 1 addition & 1 deletion include/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
CSV for C++, version 2.2.3
CSV for C++, version 2.3.0
https://github.com/vincentlaucsb/csv-parser
MIT License
Expand Down
6 changes: 3 additions & 3 deletions include/internal/csv_row.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ namespace csv {
}

CSV_INLINE void CSVFieldList::allocate() {
RawCSVField * buffer = new RawCSVField[_single_buffer_capacity];
buffers.push_back(buffer);
buffers.push_back(std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]));

_current_buffer_size = 0;
_back = &(buffers.back()[0]);
_back = buffers.back().get();
}
}

Expand Down
22 changes: 14 additions & 8 deletions include/internal/csv_row.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#pragma once
#include <cmath>
#include <deque>
#include <iterator>
#include <memory> // For CSVField
#include <limits> // For CSVField
Expand Down Expand Up @@ -73,16 +74,15 @@ namespace csv {
// CSVFieldArrays may be moved
CSVFieldList(CSVFieldList&& other) :
_single_buffer_capacity(other._single_buffer_capacity) {
buffers = std::move(other.buffers);

for (auto&& buffer : other.buffers) {
this->buffers.emplace_back(std::move(buffer));
}

_current_buffer_size = other._current_buffer_size;
_back = other._back;
}

~CSVFieldList() {
for (auto& buffer : buffers)
delete[] buffer;
}

template <class... Args>
void emplace_back(Args&&... args) {
if (this->_current_buffer_size == this->_single_buffer_capacity) {
Expand All @@ -102,7 +102,14 @@ namespace csv {
private:
const size_t _single_buffer_capacity;

std::vector<RawCSVField*> buffers = {};
/**
* Prefer std::deque over std::vector because it does not
* reallocate upon expansion, allowing pointers to its members
* to remain valid & avoiding potential race conditions when
* CSVFieldList is accesssed simulatenously by a reading thread and
* a writing thread
*/
std::deque<std::unique_ptr<RawCSVField[]>> buffers = {};

/** Number of items in the current buffer */
size_t _current_buffer_size = 0;
Expand All @@ -114,7 +121,6 @@ namespace csv {
void allocate();
};


/** A class for storing raw CSV data and associated metadata */
struct RawCSVData {
std::shared_ptr<void> _data = nullptr;
Expand Down
8 changes: 8 additions & 0 deletions programs/csv_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,15 @@ int main(int argc, char** argv) {
std::chrono::duration<double> diff = end - start;

std::cout << "Parsing took (including disk IO): " << diff.count() << std::endl;
std::cout << "Dimensions: " << info.n_rows << " rows x " << info.n_cols << " columns " << std::endl;
std::cout << "Columns: ";
for (auto& col : info.col_names) {
std::cout << " " << col;
}
std::cout << std::endl;

// Benchmark 2: Parsing Only
/*
std::ifstream csv(filename);
std::stringstream buffer;
buffer << csv.rdbuf();
Expand All @@ -35,6 +42,7 @@ int main(int argc, char** argv) {
diff = end - start;
std::cout << "Parsing took: " << diff.count() << std::endl;
*/

return 0;
}
30 changes: 18 additions & 12 deletions single_include/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once
/*
CSV for C++, version 2.2.3
CSV for C++, version 2.3.0
https://github.com/vincentlaucsb/csv-parser
MIT License
Expand Down Expand Up @@ -5051,6 +5051,7 @@ namespace csv {
*/

#include <cmath>
#include <deque>
#include <iterator>
#include <memory> // For CSVField
#include <limits> // For CSVField
Expand Down Expand Up @@ -5468,16 +5469,15 @@ namespace csv {
// CSVFieldArrays may be moved
CSVFieldList(CSVFieldList&& other) :
_single_buffer_capacity(other._single_buffer_capacity) {
buffers = std::move(other.buffers);

for (auto&& buffer : other.buffers) {
this->buffers.emplace_back(std::move(buffer));
}

_current_buffer_size = other._current_buffer_size;
_back = other._back;
}

~CSVFieldList() {
for (auto& buffer : buffers)
delete[] buffer;
}

template <class... Args>
void emplace_back(Args&&... args) {
if (this->_current_buffer_size == this->_single_buffer_capacity) {
Expand All @@ -5497,7 +5497,14 @@ namespace csv {
private:
const size_t _single_buffer_capacity;

std::vector<RawCSVField*> buffers = {};
/**
* Prefer std::deque over std::vector because it does not
* reallocate upon expansion, allowing pointers to its members
* to remain valid & avoiding potential race conditions when
* CSVFieldList is accesssed simulatenously by a reading thread and
* a writing thread
*/
std::deque<std::unique_ptr<RawCSVField[]>> buffers = {};

/** Number of items in the current buffer */
size_t _current_buffer_size = 0;
Expand All @@ -5509,7 +5516,6 @@ namespace csv {
void allocate();
};


/** A class for storing raw CSV data and associated metadata */
struct RawCSVData {
std::shared_ptr<void> _data = nullptr;
Expand Down Expand Up @@ -7708,10 +7714,10 @@ namespace csv {
}

CSV_INLINE void CSVFieldList::allocate() {
RawCSVField * buffer = new RawCSVField[_single_buffer_capacity];
buffers.push_back(buffer);
buffers.push_back(std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]));

_current_buffer_size = 0;
_back = &(buffers.back()[0]);
_back = buffers.back().get();
}
}

Expand Down
30 changes: 18 additions & 12 deletions single_include_test/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once
/*
CSV for C++, version 2.2.3
CSV for C++, version 2.3.0
https://github.com/vincentlaucsb/csv-parser
MIT License
Expand Down Expand Up @@ -5051,6 +5051,7 @@ namespace csv {
*/

#include <cmath>
#include <deque>
#include <iterator>
#include <memory> // For CSVField
#include <limits> // For CSVField
Expand Down Expand Up @@ -5468,16 +5469,15 @@ namespace csv {
// CSVFieldArrays may be moved
CSVFieldList(CSVFieldList&& other) :
_single_buffer_capacity(other._single_buffer_capacity) {
buffers = std::move(other.buffers);

for (auto&& buffer : other.buffers) {
this->buffers.emplace_back(std::move(buffer));
}

_current_buffer_size = other._current_buffer_size;
_back = other._back;
}

~CSVFieldList() {
for (auto& buffer : buffers)
delete[] buffer;
}

template <class... Args>
void emplace_back(Args&&... args) {
if (this->_current_buffer_size == this->_single_buffer_capacity) {
Expand All @@ -5497,7 +5497,14 @@ namespace csv {
private:
const size_t _single_buffer_capacity;

std::vector<RawCSVField*> buffers = {};
/**
* Prefer std::deque over std::vector because it does not
* reallocate upon expansion, allowing pointers to its members
* to remain valid & avoiding potential race conditions when
* CSVFieldList is accesssed simulatenously by a reading thread and
* a writing thread
*/
std::deque<std::unique_ptr<RawCSVField[]>> buffers = {};

/** Number of items in the current buffer */
size_t _current_buffer_size = 0;
Expand All @@ -5509,7 +5516,6 @@ namespace csv {
void allocate();
};


/** A class for storing raw CSV data and associated metadata */
struct RawCSVData {
std::shared_ptr<void> _data = nullptr;
Expand Down Expand Up @@ -7708,10 +7714,10 @@ namespace csv {
}

CSV_INLINE void CSVFieldList::allocate() {
RawCSVField * buffer = new RawCSVField[_single_buffer_capacity];
buffers.push_back(buffer);
buffers.push_back(std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]));

_current_buffer_size = 0;
_back = &(buffers.back()[0]);
_back = buffers.back().get();
}
}

Expand Down

0 comments on commit 105b44d

Please sign in to comment.