Skip to content

Commit

Permalink
Redesign dmlc::TemporaryDirectory to allow recursive deletion (#469)
Browse files Browse the repository at this point in the history
* Redesign dmlc::TempDirectory to allow recursive deletion

Use dmlc::io::LocalFileSystem::ListDirectory to allow recursive delection of
temporary directories. Now users don't have to call AddFile(); it suffices to
create files inside the directory.

Limitation: no symbolic links are allowed inside the temporary directory.

* Fix memory error

* Fix lint

* Add usage example

* Update docstring

* Add unit test for TemporaryDirectory

* Fix compilation error
  • Loading branch information
hcho3 authored and Kumar committed Nov 5, 2018
1 parent 59986f2 commit cef015f
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 51 deletions.
113 changes: 78 additions & 35 deletions include/dmlc/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define DMLC_FILESYSTEM_H_

#include <dmlc/logging.h>
#include <algorithm>
#include <string>
#include <vector>
#include <random>
Expand All @@ -18,24 +19,44 @@
#include <windows.h>
#include <Shlwapi.h>
#pragma comment(lib, "Shlwapi.lib")
#else
#else // _WIN32
#include <unistd.h>
#endif
#include <sys/stat.h>
#include <sys/types.h>
#endif // _WIN32

#include "../../src/io/filesys.h"

namespace dmlc {

/*!
* \brief Manager class for temporary directories. Whenever a new object is
* constructed, a temporary directory is created. The directory is
* deleted when the object is deleted or goes out of scope.
* NOTE. The class needs to keep track of all files inside the temporary
* directory in order to delete the directory successfully in the end.
* Thus, the user should call AddFile() method to add each file.
* \brief Manager class for temporary directories. Whenever a new
* TemporaryDirectory object is constructed, a temporary directory is
* created. The directory is deleted when the object is deleted or goes
* out of scope. Note: no symbolic links are allowed inside the
* temporary directory.
*
* Usage example:
* \code
*
* void foo() {
* dmlc::TemporaryDirectory tempdir;
* // Create a file my_file.txt inside the temporary directory
* std::ofstream of(tempdir.path + "/my_file.txt");
* // ... write to my_file.txt ...
*
* // ... use my_file.txt
*
* // When tempdir goes out of scope, the temporary directory is deleted
* }
*
* \endcode
*/
class TemporaryDirectory {
public:
/*!
* \brief Default constructor. Creates a new temporary directory
* \brief Default constructor.
* Creates a new temporary directory with a unique name.
* \param verbose whether to emit extra messages
*/
explicit TemporaryDirectory(bool verbose = false)
Expand Down Expand Up @@ -66,7 +87,7 @@ class TemporaryDirectory {
<< "Could not create temporary directory";
}
path = std::string(tmpdir);
#else
#else // _WIN32
std::string tmproot; /* root directory of temporary area */
std::string dirtemplate; /* template for temporary directory name */
/* Get TMPDIR env variable or fall back to /tmp/ */
Expand All @@ -91,16 +112,59 @@ class TemporaryDirectory {
<< "Could not create temporary directory";
}
path = std::string(tmpdir);
#endif
#endif // _WIN32
if (verbose_) {
LOG(INFO) << "Created temporary directory " << path;
}
}

/*! \brief Destructor. Will perform recursive deletion via RecursiveDelete() */
~TemporaryDirectory() {
for (const std::string& filename : file_list_) {
if (std::remove(filename.c_str()) != 0) {
LOG(FATAL) << "Couldn't remove file " << filename;
RecursiveDelete(path);
}

/*! \brief Full path of the temporary directory */
std::string path;

private:
/*! \brief Whether to emit extra messages */
bool verbose_;

/*!
* \brief Determine whether a given path is a symbolic link
* \param path String representation of path
*/
inline bool IsSymlink(const std::string& path) {
#ifdef _WIN32
DWORD attr = GetFileAttributesA(path.c_str());
CHECK_NE(attr, INVALID_FILE_ATTRIBUTES)
<< "dmlc::TemporaryDirectory::IsSymlink(): Unable to read file attributes";
return attr & FILE_ATTRIBUTE_REPARSE_POINT;
#else // _WIN32
struct stat sb;
CHECK_EQ(lstat(path.c_str(), &sb), 0)
<< "dmlc::TemporaryDirectory::IsSymlink(): Unable to read file attributes";
return S_ISLNK(sb.st_mode);
#endif // _WIN32
}

/*!
* \brief Delete a directory recursively, along with sub-directories and files.
* \param path String representation of path. It must refer to a directory.
*/
inline void RecursiveDelete(const std::string& path) {
io::URI uri(path.c_str());
io::FileSystem* fs = io::FileSystem::GetInstance(uri);
std::vector<io::FileInfo> file_list;
fs->ListDirectory(uri, &file_list);
for (io::FileInfo info : file_list) {
CHECK(!IsSymlink(info.path.name))
<< "Symlink not supported in TemporaryDirectory";
if (info.type == io::FileType::kDirectory) {
RecursiveDelete(info.path.name);
} else {
CHECK_EQ(std::remove(info.path.name.c_str()), 0)
<< "Couldn't remove file " << info.path.name;
}
}
#if _WIN32
Expand All @@ -117,27 +181,6 @@ class TemporaryDirectory {
<< "Could not remove temporary directory " << path;
}
}

/*!
* \brief Add a file under the temporary directory, to be tracked. When the
* directory gets deleted, the file will get deleted also.
* \param filename name of the file
* \return full path of the file
*/
std::string AddFile(const std::string& filename) {
const std::string file_path = this->path + "/" + filename;
file_list_.push_back(file_path);
return file_path;
}

/*! \brief Path of the temporary directory */
std::string path;

private:
/*! \brief List of files being tracked */
std::vector<std::string> file_list_;
/*! \brief Whether to emit extra messages */
bool verbose_;
};

} // namespace dmlc
Expand Down
25 changes: 9 additions & 16 deletions test/unittest/unittest_inputsplit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,15 @@ TEST(InputSplit, test_split_csv_noeol) {
/* Create a test case for partitioned csv with NOEOL */
dmlc::TemporaryDirectory tempdir;
{
const std::string filename = tempdir.AddFile("train_0.csv");
std::ofstream of(filename.c_str(), std::ios::binary);
std::ofstream of(tempdir.path + "/train_0.csv", std::ios::binary);
of << "0,1,1,1"; // NOEOL (no '\n' at end of file)
}
{
const std::string filename = tempdir.AddFile("train_1.csv");
std::ofstream of(filename.c_str(), std::ios::binary);
std::ofstream of(tempdir.path + "/train_1.csv", std::ios::binary);
of << "0,1,1,2\n";
}
{
const std::string filename = tempdir.AddFile("train_2.csv");
std::ofstream of(filename.c_str(), std::ios::binary);
std::ofstream of(tempdir.path + "/train_2.csv", std::ios::binary);
of << "0,1,1,2\n";
}
/* Load the test case with InputSplit and obtain matrix dimensions */
Expand All @@ -66,13 +63,11 @@ TEST(InputSplit, test_split_libsvm_noeol) {
= "1 3:1 10:1 11:1 21:1 30:1 34:1 36:1 40:1 41:1 53:1 58:1 65:1 69:1 "
"77:1 86:1 88:1 92:1 95:1 102:1 105:1 117:1 124:1";
{
const std::string filename = tempdir.AddFile("train_0.libsvm");
std::ofstream of(filename.c_str(), std::ios::binary);
std::ofstream of(tempdir.path + "/train_0.libsvm", std::ios::binary);
of << line << "\n";
}
{
const std::string filename = tempdir.AddFile("train_1.libsvm");
std::ofstream of(filename.c_str(), std::ios::binary);
std::ofstream of(tempdir.path + "/train_1.libsvm", std::ios::binary);
of << line; // NOEOL (no '\n' at end of file)
}
std::unique_ptr<dmlc::Parser<uint32_t> > parser(
Expand All @@ -91,9 +86,8 @@ TEST(InputSplit, test_split_libsvm) {
dmlc::TemporaryDirectory tempdir;
const int nfile = 5;
for (int file_id = 0; file_id < nfile; ++file_id) {
const std::string filename
= tempdir.AddFile(std::string("test_") + std::to_string(file_id) + ".libsvm");
std::ofstream of(filename.c_str(), std::ios::binary);
std::ofstream of(tempdir.path + "/test_" + std::to_string(file_id) + ".libsvm",
std::ios::binary);
of << "1 3:1 10:1 11:1 21:1 30:1 34:1 36:1 40:1 41:1 53:1 58:1 65:1 69:1 "
<< "77:1 86:1 88:1 92:1 95:1 102:1 105:1 117:1 124:1\n";
}
Expand All @@ -118,9 +112,8 @@ TEST(InputSplit, test_split_libsvm_distributed) {
"77:1 86:1 88:1 92:1 95:1 102:1 105:1 117:1 124:1\n";
const int nfile = 5;
for (int file_id = 0; file_id < nfile; ++file_id) {
const std::string filename
= tempdir.AddFile(std::string("test_") + std::to_string(file_id) + ".libsvm");
std::ofstream of(filename.c_str(), std::ios::binary);
std::ofstream of(tempdir.path + "/test_" + std::to_string(file_id) + ".libsvm",
std::ios::binary);
const int nrepeat = (file_id == 0 ? 6 : 1);
for (int i = 0; i < nrepeat; ++i) {
of << line;
Expand Down
77 changes: 77 additions & 0 deletions test/unittest/unittest_tempdir.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#include <dmlc/filesystem.h>
#include <gtest/gtest.h>
#include <fstream>
#include <string>
#include <queue>
#include <utility>

#ifdef _WIN32
#include <direct.h>
#else // _WIN32
#include <sys/stat.h>
#endif // _WIN32

static inline void MakeDirectory(const std::string& path) {
#ifdef _WIN32
CHECK_EQ(_mkdir(path.c_str()), 0) << "Failed to make directory " << path;
#else // _WIN32
CHECK_EQ(mkdir(path.c_str(), 0777), 0) << "Failed to make directory " << path;
#endif // _WIN32
}

TEST(TemporaryDirectory, test_basic) {
std::string tempdir_path;
{
dmlc::TemporaryDirectory tempdir;
tempdir_path = tempdir.path;
const int num_file = 5;
for (int i = 0; i < num_file; ++i) {
std::ofstream fout(tempdir.path + "/" + std::to_string(i) + ".txt");
fout << "0,1,1," << (i + 1) << "\n";
}
// Check if each file can be read back
for (int i = 0; i < num_file; ++i) {
std::ifstream fin(tempdir.path + "/" + std::to_string(i) + ".txt");
std::string s;
ASSERT_TRUE(static_cast<bool>(std::getline(fin, s)));
ASSERT_EQ(s, std::string("0,1,1," + std::to_string(i + 1)));
ASSERT_FALSE(static_cast<bool>(std::getline(fin, s)));
}
}
// Test the directory is indeed deleted.
const dmlc::io::URI uri(tempdir_path.c_str());
ASSERT_ANY_THROW(dmlc::io::FileSystem::GetInstance(uri)->GetPathInfo(uri));
}

TEST(TemporaryDirectory, test_recursive) {
std::string tempdir_path;
{
dmlc::TemporaryDirectory tempdir;
tempdir_path = tempdir.path;
const int recurse_depth = 5;

std::queue<std::pair<int, std::string>> Q; // (depth, directory)
Q.emplace(0, tempdir.path);
while (!Q.empty()) {
auto e = Q.front(); Q.pop();
const int current_depth = e.first;
const std::string current_directory = e.second;
if (current_depth < recurse_depth) {
{
std::ofstream of(current_directory + "/foobar.txt");
of << "hello world\n";
}
MakeDirectory(current_directory + "/1");
MakeDirectory(current_directory + "/2");
Q.emplace(current_depth + 1, current_directory + "/1");
Q.emplace(current_depth + 1, current_directory + "/2");
} else {
std::ofstream of(current_directory + "/foobar.txt");
of << "hello world\n";
}
}
}
// Test the directory is indeed deleted.
const dmlc::io::URI uri(tempdir_path.c_str());
ASSERT_ANY_THROW(dmlc::io::FileSystem::GetInstance(uri)->GetPathInfo(uri));
}

0 comments on commit cef015f

Please sign in to comment.