From 16aeec64d2263466624cfa3811f46b8c11d6ea50 Mon Sep 17 00:00:00 2001 From: annoviko Date: Tue, 5 Feb 2019 17:35:26 +0300 Subject: [PATCH] #381: CLIQUE integration between C++ and Python. --- ccore/src/ccore.vcxproj | 2 + ccore/src/ccore.vcxproj.filters | 6 + ccore/src/interface/clique_interface.cpp | 62 ++++++++ ccore/src/interface/clique_interface.h | 62 ++++++++ ccore/tst/utcore.vcxproj | 3 + ccore/tst/utcore.vcxproj.filters | 9 ++ ccore/tst/utest-interface-clique.cpp | 40 +++++ pyclustering/cluster/clique.py | 40 ++++- .../cluster/tests/integration/__init__.py | 10 +- .../cluster/tests/integration/it_clique.py | 141 ++++++++++++++++++ 10 files changed, 364 insertions(+), 11 deletions(-) create mode 100755 ccore/src/interface/clique_interface.cpp create mode 100755 ccore/src/interface/clique_interface.h create mode 100755 ccore/tst/utest-interface-clique.cpp create mode 100755 pyclustering/cluster/tests/integration/it_clique.py diff --git a/ccore/src/ccore.vcxproj b/ccore/src/ccore.vcxproj index 0df829b1..2c489f3a 100644 --- a/ccore/src/ccore.vcxproj +++ b/ccore/src/ccore.vcxproj @@ -54,6 +54,7 @@ + @@ -151,6 +152,7 @@ + diff --git a/ccore/src/ccore.vcxproj.filters b/ccore/src/ccore.vcxproj.filters index ac63e0a3..01a1aa25 100644 --- a/ccore/src/ccore.vcxproj.filters +++ b/ccore/src/ccore.vcxproj.filters @@ -244,6 +244,9 @@ Source Files\cluster + + Source Files\interface + @@ -537,5 +540,8 @@ Source Files\cluster + + Source Files\interface + \ No newline at end of file diff --git a/ccore/src/interface/clique_interface.cpp b/ccore/src/interface/clique_interface.cpp new file mode 100755 index 00000000..469fdbb5 --- /dev/null +++ b/ccore/src/interface/clique_interface.cpp @@ -0,0 +1,62 @@ +/** +* +* @authors Andrei Novikov (pyclustering@yandex.ru) +* @date 2014-2019 +* @copyright GNU Public License +* +* GNU_PUBLIC_LICENSE +* pyclustering is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* pyclustering is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#include "interface/clique_interface.h" + +#include "cluster/clique.hpp" + + +pyclustering_package * clique_algorithm(const pyclustering_package * const p_sample, const std::size_t p_intervals, const std::size_t p_threshold) { + dataset input_dataset; + p_sample->extract(input_dataset); + + ccore::clst::clique solver(p_intervals, p_threshold); + + ccore::clst::clique_data output_result; + + solver.process(input_dataset, output_result); + + pyclustering_package * package = create_package_container(CLIQUE_PACKAGE_SIZE); + + ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_CLUSTERS] = create_package(&output_result.clusters()); + ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_NOISE] = create_package(&output_result.noise()); + + const auto & blocks = output_result.blocks(); + ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_LOGICAL_LOCATION] = create_package_container(blocks.size()); + ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_MAX_CORNER] = create_package_container(blocks.size()); + ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_MIN_CORNER] = create_package_container(blocks.size()); + ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_BLOCK_POINTS] = create_package_container(blocks.size()); + + pyclustering_package * logical_location = ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_LOGICAL_LOCATION]; + pyclustering_package * max_corner = ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_MAX_CORNER]; + pyclustering_package * min_corner = ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_MIN_CORNER]; + pyclustering_package * block_points = ((pyclustering_package **) package->data)[CLIQUE_PACKAGE_INDEX_BLOCK_POINTS]; + + for (std::size_t i = 0; i < blocks.size(); i++) { + ((pyclustering_package **) logical_location->data)[i] = create_package(&(blocks[i].get_logical_location())); + ((pyclustering_package **) max_corner->data)[i] = create_package(&(blocks[i].get_spatial_block().get_max_corner())); + ((pyclustering_package **) min_corner->data)[i] = create_package(&(blocks[i].get_spatial_block().get_min_corner())); + ((pyclustering_package **) block_points->data)[i] = create_package(&(blocks[i].get_points())); + } + + return package; +} \ No newline at end of file diff --git a/ccore/src/interface/clique_interface.h b/ccore/src/interface/clique_interface.h new file mode 100755 index 00000000..6ea30bf4 --- /dev/null +++ b/ccore/src/interface/clique_interface.h @@ -0,0 +1,62 @@ +/** +* +* @authors Andrei Novikov (pyclustering@yandex.ru) +* @date 2014-2019 +* @copyright GNU Public License +* +* GNU_PUBLIC_LICENSE +* pyclustering is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* pyclustering is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#pragma once + + +#include "interface/pyclustering_package.hpp" + +#include "definitions.hpp" + + +/** + * + * @brief CLIQUE result is returned by pyclustering_package that consist sub-packages and this enumerator provides + * named indexes for sub-packages. + * + */ +enum clique_package_indexer { + CLIQUE_PACKAGE_INDEX_CLUSTERS = 0, + CLIQUE_PACKAGE_INDEX_NOISE, + CLIQUE_PACKAGE_INDEX_LOGICAL_LOCATION, + CLIQUE_PACKAGE_INDEX_MAX_CORNER, + CLIQUE_PACKAGE_INDEX_MIN_CORNER, + CLIQUE_PACKAGE_INDEX_BLOCK_POINTS, + CLIQUE_PACKAGE_SIZE +}; + + +/** + * + * @brief Clustering algorithm CLIQUE returns allocated clusters. + * @details Caller should destroy returned clustering data using 'cure_data_destroy' when + * it is not required anymore. + * + * @param[in] p_sample: input data for clustering. + * @param[in] p_intervals: amount of intervals in each dimension. + * @param[in] p_threshold: minimum number of objects that should be contained by non-noise block. + * + * @return Returns pointer to cure data - clustering result that can be used for obtaining + * allocated clusters, representative points and means of each cluster. + * + */ +extern "C" DECLARATION pyclustering_package * clique_algorithm(const pyclustering_package * const p_sample, const std::size_t p_intervals, const std::size_t p_threshold); \ No newline at end of file diff --git a/ccore/tst/utcore.vcxproj b/ccore/tst/utcore.vcxproj index 605f50a7..233a29c1 100644 --- a/ccore/tst/utcore.vcxproj +++ b/ccore/tst/utcore.vcxproj @@ -201,6 +201,7 @@ + @@ -240,6 +241,7 @@ + @@ -360,6 +362,7 @@ + diff --git a/ccore/tst/utcore.vcxproj.filters b/ccore/tst/utcore.vcxproj.filters index cf799cc8..82837dc3 100644 --- a/ccore/tst/utcore.vcxproj.filters +++ b/ccore/tst/utcore.vcxproj.filters @@ -443,6 +443,12 @@ Unit Tests + + Tested Code\interface + + + Unit Tests + @@ -745,5 +751,8 @@ Tested Code\cluster + + Tested Code\interface + \ No newline at end of file diff --git a/ccore/tst/utest-interface-clique.cpp b/ccore/tst/utest-interface-clique.cpp new file mode 100755 index 00000000..f4e69fe7 --- /dev/null +++ b/ccore/tst/utest-interface-clique.cpp @@ -0,0 +1,40 @@ +/** +* +* @authors Andrei Novikov (pyclustering@yandex.ru) +* @date 2014-2019 +* @copyright GNU Public License +* +* GNU_PUBLIC_LICENSE +* pyclustering is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* pyclustering is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +*/ + +#include "gtest/gtest.h" + +#include "interface/clique_interface.h" +#include "interface/pyclustering_package.hpp" + +#include "utenv_utils.hpp" + +#include + + +TEST(utest_interface_clique, clique_algorithm) { + std::shared_ptr sample = pack(dataset({ { 1.0, 1.0 }, { 1.1, 1.0 }, { 1.2, 1.4 }, { 10.0, 10.3 }, { 10.1, 10.2 }, { 10.2, 10.4 } })); + + pyclustering_package * result = clique_algorithm(sample.get(), 2, 0); + ASSERT_EQ((std::size_t) CLIQUE_PACKAGE_SIZE, result->size); + + delete result; +} \ No newline at end of file diff --git a/pyclustering/cluster/clique.py b/pyclustering/cluster/clique.py index 0594fe59..9f10c649 100755 --- a/pyclustering/cluster/clique.py +++ b/pyclustering/cluster/clique.py @@ -27,7 +27,9 @@ import itertools -from collections import deque +from pyclustering.core.wrapper import ccore_library + +import pyclustering.core.clique_wrapper as wrapper try: @@ -189,11 +191,11 @@ def get_corners(self): class clique_block: - def __init__(self): - self.__logical_location = [] - self.__spatial_location = None - self.__points = [] - self.__visited = False + def __init__(self, logical_location=None, spatial_location=None, points=None, visited=False): + self.__logical_location = logical_location or [] + self.__spatial_location = spatial_location + self.__points = points or [] + self.__visited = visited def __str__(self): return str(self.__logical_location) @@ -288,6 +290,10 @@ def __init__(self, data, amount_intervals, density_threshold, **kwargs): self.__amount_intervals = amount_intervals self.__density_threshold = density_threshold + self.__ccore = kwargs.get('ccore', True) + if self.__ccore: + self.__ccore = ccore_library.workable() + self.__clusters = [] self.__noise = [] @@ -298,11 +304,31 @@ def __init__(self, data, amount_intervals, density_threshold, **kwargs): def process(self): + if self.__ccore: + self.__process_by_ccore() + else: + self.__process_by_python() + + return self + + + def __process_by_ccore(self): + (self.__clusters, self.__noise, block_logical_locations, block_max_corners, block_min_corners, block_points) = \ + wrapper.clique(self.__data, self.__amount_intervals, self.__density_threshold) + + amount_cells = len(block_logical_locations) + for i in range(amount_cells): + self.__cells.append(clique_block(block_logical_locations[i], + spatial_block(block_max_corners[i], block_min_corners[i]), + block_points[i], + True)) + + + def __process_by_python(self): self.__create_grid() self.__allocate_clusters() self.__cells_map.clear() - return self def get_clusters(self): diff --git a/pyclustering/cluster/tests/integration/__init__.py b/pyclustering/cluster/tests/integration/__init__.py index b4277184..15fe9dbb 100755 --- a/pyclustering/cluster/tests/integration/__init__.py +++ b/pyclustering/cluster/tests/integration/__init__.py @@ -23,16 +23,17 @@ """ -import unittest; -from pyclustering.tests.suite_holder import suite_holder; +import unittest +from pyclustering.tests.suite_holder import suite_holder # Generate images without having a window appear. -import matplotlib; -matplotlib.use('Agg'); +import matplotlib +matplotlib.use('Agg') from pyclustering.cluster.tests.integration import it_agglomerative as cluster_agglomerative_integration_tests from pyclustering.cluster.tests.integration import it_bsas as cluster_bsas_integration_tests +from pyclustering.cluster.tests.integration import it_clique as cluster_clique_integration_tests from pyclustering.cluster.tests.integration import it_cure as cluster_cure_integration_tests from pyclustering.cluster.tests.integration import it_dbscan as cluster_dbscan_integration_tests from pyclustering.cluster.tests.integration import it_elbow as cluster_elbow_integration_tests @@ -58,6 +59,7 @@ def __init__(self): def fill_suite(integration_cluster_suite): integration_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_agglomerative_integration_tests)) integration_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_bsas_integration_tests)) + integration_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_clique_integration_tests)) integration_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_cure_integration_tests)) integration_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_dbscan_integration_tests)) integration_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_elbow_integration_tests)) diff --git a/pyclustering/cluster/tests/integration/it_clique.py b/pyclustering/cluster/tests/integration/it_clique.py new file mode 100755 index 00000000..50a2bab3 --- /dev/null +++ b/pyclustering/cluster/tests/integration/it_clique.py @@ -0,0 +1,141 @@ +"""! + +@brief Integration-tests for CLIQUE algorithm. + +@authors Andrei Novikov (pyclustering@yandex.ru) +@date 2014-2019 +@copyright GNU Public License + +@cond GNU_PUBLIC_LICENSE + PyClustering is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + PyClustering is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +@endcond + +""" + +import unittest + +# Generate images without having a window appear. +import matplotlib +matplotlib.use('Agg') + +from pyclustering.cluster.tests.clique_templates import clique_test_template + +from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES + +from pyclustering.core.tests import remove_library + + +class clique_integration_test(unittest.TestCase): + def test_clustering_sample_simple_1_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 8, 0, [5, 5], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 7, 0, [5, 5], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 6, 0, [5, 5], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 5, 0, [5, 5], 0, True) + + def test_clustering_sample_simple_1_one_cluster_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, 0, [10], 0, True) + + def test_clustering_diagonal_blocks_arent_neoghbors_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 2, 0, [5, 5], 0, True) + + def test_clustering_sample_simple_1_noise_only_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 6, 1000, [], 10, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 6, 10, [], 10, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 2, 5, [], 10, True) + + def test_clustering_sample_simple_2_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 7, 0, [5, 8, 10], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 6, 0, [5, 8, 10], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 1, 0, [23], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 6, 500, [], 23, True) + + def test_clustering_sample_simple_3_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 9, 0, [10, 10, 10, 30], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 8, 0, [10, 10, 10, 30], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 1, 0, [60], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 6, 500, [], 60, True) + + def test_clustering_sample_simple_3_one_point_noise_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 2, 9, [59], 1, True) + + def test_clustering_sample_simple_4_one_cluster_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 1, 0, [75], 0, True) + + def test_clustering_sample_simple_5_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 8, 0, [15, 15, 15, 15], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 7, 0, [15, 15, 15, 15], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 6, 0, [15, 15, 15, 15], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 5, 0, [15, 15, 15, 15], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 1, 0, [60], 0, True) + + def test_clustering_one_dimensional_data1_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE7, 4, 0, [10, 10], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE7, 2, 0, [20], 0, True) + + def test_clustering_one_dimensional_data2_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE8, 15, 0, [15, 20, 30, 80], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE8, 2, 0, [145], 0, True) + + def test_clustering_one_dimensional_data_3_similar_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE9, 7, 0, [10, 20], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE9, 2, 0, [30], 0, True) + + def test_clustering_sample_simple_10_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 8, 0, [11, 11, 11], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 7, 0, [11, 11, 11], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 2, 0, [33], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 1, 0, [33], 0, True) + + def test_clustering_three_dimensional_data1_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 6, 0, [10, 10], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 5, 0, [10, 10], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 1, 0, [20], 0, True) + + def test_clustering_similar_points_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 8, 0, [5, 5, 5], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 7, 0, [5, 5, 5], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 5, 0, [5, 5, 5], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 2, 0, [15], 0, True) + + def test_clustering_zero_column_by_core(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, 3, 0, [5, 5], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, 2, 0, [5, 5], 0, True) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, 1, 0, [10], 0, True) + + def test_clustering_fcps_lsun_by_core(self): + clique_test_template.clustering(FCPS_SAMPLES.SAMPLE_LSUN, 15, 0, [100, 101, 202], 0, True) + + def test_clustering_fcps_hepta_by_core(self): + clique_test_template.clustering(FCPS_SAMPLES.SAMPLE_HEPTA, 9, 0, [30, 30, 30, 30, 30, 30, 32], 0, True) + + + def test_visualize_no_failure_one_dimensional_by_core(self): + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE7, 4, 0, True) + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE8, 7, 0, True) + + def test_visualize_no_failure_two_dimensional_by_core(self): + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 8, 0, True) + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, 0, True) + + def test_visualize_no_failure_three_dimensional_by_core(self): + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 3, 0, True) + + @remove_library + def test_processing_when_library_core_corrupted(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 8, 0, [5, 5], 0, True) + + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file