From 56bc237b78251929c80ff195e5d2176bf9fb1122 Mon Sep 17 00:00:00 2001 From: annoviko Date: Tue, 16 Jul 2019 15:01:23 +0300 Subject: [PATCH] #521: OPTICS optimization - reduce algorithmic complexity. --- ccore/src/cluster/optics.cpp | 31 ++++++++++++++++++++----------- ccore/src/cluster/optics.hpp | 25 +++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/ccore/src/cluster/optics.cpp b/ccore/src/cluster/optics.cpp index eb6f211c..d4e0e606 100644 --- a/ccore/src/cluster/optics.cpp +++ b/ccore/src/cluster/optics.cpp @@ -36,7 +36,9 @@ namespace ccore { namespace clst { -const double optics::NONE_DISTANCE = optics_descriptor::NONE_DISTANCE; +const double optics::NONE_DISTANCE = optics_descriptor::NONE_DISTANCE; + +const std::size_t optics::INVALID_INDEX = std::numeric_limits::max(); optics::optics(const double p_radius, const std::size_t p_neighbors) : optics() { @@ -126,14 +128,13 @@ void optics::allocate_clusters(void) { void optics::expand_cluster_order(optics_descriptor & p_object) { p_object.m_processed = true; - std::vector< std::tuple > neighbors; + neighbors_collection neighbors; get_neighbors(p_object.m_index, neighbors); m_ordered_database.push_back(&p_object); if (neighbors.size() >= m_neighbors) { - std::sort(neighbors.begin(), neighbors.end(), [](const auto & a, const auto & b) { return std::get<1>(a) < std::get<1>(b); }); - p_object.m_core_distance = std::get<1>(neighbors[m_neighbors - 1]); + p_object.m_core_distance = get_core_distance(neighbors); std::multiset order_seed; update_order_seed(p_object, neighbors, order_seed); @@ -148,9 +149,7 @@ void optics::expand_cluster_order(optics_descriptor & p_object) { m_ordered_database.push_back(descriptor); if (neighbors.size() >= m_neighbors) { - std::sort(neighbors.begin(), neighbors.end(), [](const auto & a, const auto & b) { return std::get<1>(a) < std::get<1>(b); }); - descriptor->m_core_distance = std::get<1>(neighbors[m_neighbors - 1]); - + descriptor->m_core_distance = get_core_distance(neighbors); update_order_seed(*descriptor, neighbors, order_seed); } else { @@ -166,8 +165,8 @@ void optics::expand_cluster_order(optics_descriptor & p_object) { void optics::update_order_seed(const optics_descriptor & p_object, const neighbors_collection & p_neighbors, std::multiset & order_seed) { for (auto & descriptor : p_neighbors) { - std::size_t index_neighbor = std::get<0>(descriptor); - double current_reachability_distance = std::get<1>(descriptor); + std::size_t index_neighbor = descriptor.m_index; + double current_reachability_distance = descriptor.m_reachability_distance; optics_descriptor & optics_object = m_optics_objects->at(index_neighbor); if (!optics_object.m_processed) { @@ -240,7 +239,7 @@ void optics::get_neighbors_from_points(const std::size_t p_index, neighbors_coll container::kdtree_searcher::rule_store rule = [&p_index, &p_neighbors](const container::kdnode::ptr & p_node, const double p_distance) { if (p_index != (std::size_t) p_node->get_payload()) { - p_neighbors.push_back(std::make_tuple((std::size_t) p_node->get_payload(), std::sqrt(p_distance))); + p_neighbors.emplace((std::size_t) p_node->get_payload(), std::sqrt(p_distance)); } }; @@ -255,12 +254,22 @@ void optics::get_neighbors_from_distance_matrix(const std::size_t p_index, neigh for (std::size_t index_neighbor = 0; index_neighbor < distances.size(); index_neighbor++) { const double candidate_distance = distances[index_neighbor]; if ( (candidate_distance <= m_radius) && (index_neighbor != p_index) ) { - p_neighbors.push_back(std::make_tuple(index_neighbor, candidate_distance)); + p_neighbors.emplace(index_neighbor, candidate_distance); } } } +double optics::get_core_distance(const neighbors_collection & p_neighbors) const { + auto iter = p_neighbors.cbegin(); + for (std::size_t index = 0; index < (m_neighbors - 1); ++index) { + ++iter; + } + + return iter->m_reachability_distance; +} + + void optics::calculate_ordering(void) { if (!m_result_ptr->cluster_ordering().empty()) { return; } diff --git a/ccore/src/cluster/optics.hpp b/ccore/src/cluster/optics.hpp index 282e2cbd..6ce3f662 100644 --- a/ccore/src/cluster/optics.hpp +++ b/ccore/src/cluster/optics.hpp @@ -62,10 +62,29 @@ enum class optics_data_t { */ class optics : public cluster_algorithm { public: - static const double NONE_DISTANCE; + static const double NONE_DISTANCE; + static const std::size_t INVALID_INDEX; private: - using neighbors_collection = std::vector< std::tuple >; + struct neighbor_descriptor { + public: + std::size_t m_index = INVALID_INDEX; + double m_reachability_distance = 0; + + public: + neighbor_descriptor(const std::size_t p_index, const double p_distance) : + m_index(p_index), m_reachability_distance(p_distance) + { } + }; + + struct neighbor_descriptor_less { + public: + bool operator()(const neighbor_descriptor & p_object1, const neighbor_descriptor & p_object2) const { + return p_object1.m_reachability_distance < p_object2.m_reachability_distance; + } + }; + + using neighbors_collection = std::multiset; private: const dataset * m_data_ptr = nullptr; @@ -178,6 +197,8 @@ class optics : public cluster_algorithm { void get_neighbors_from_distance_matrix(const std::size_t p_index, neighbors_collection & p_neighbors); + double get_core_distance(const neighbors_collection & p_neighbors) const; + void update_order_seed(const optics_descriptor & p_object, const neighbors_collection & neighbors, std::multiset & order_seed); void calculate_ordering(void);