From 7d829f45ea3f648ab9d48e81bad1950b28cb6800 Mon Sep 17 00:00:00 2001 From: annoviko Date: Wed, 30 Jan 2019 14:33:31 +0300 Subject: [PATCH] #381: CLIQUE python implementation (update examples and add unit-tests). --- pyclustering/cluster/bang.py | 1 + pyclustering/cluster/clique.py | 12 +- pyclustering/cluster/examples/bang_example.py | 3 +- .../cluster/examples/clique_example.py | 3 +- .../cluster/tests/clique_templates.py | 33 +++- pyclustering/cluster/tests/unit/__init__.py | 2 + pyclustering/cluster/tests/unit/ut_bang.py | 5 +- pyclustering/cluster/tests/unit/ut_clique.py | 144 +++++++++++++++++- 8 files changed, 188 insertions(+), 15 deletions(-) diff --git a/pyclustering/cluster/bang.py b/pyclustering/cluster/bang.py index 0119fce1..faa616f5 100755 --- a/pyclustering/cluster/bang.py +++ b/pyclustering/cluster/bang.py @@ -688,6 +688,7 @@ def split(self, dimension): def is_neighbor(self, block): """! @brief Performs calculation to identify whether specified block is neighbor of current block. + @details It also considers diagonal blocks as neighbors. @param[in] block (spatial_block): Another block that is check whether it is neighbor. diff --git a/pyclustering/cluster/clique.py b/pyclustering/cluster/clique.py index 7118f3be..9bdedb66 100755 --- a/pyclustering/cluster/clique.py +++ b/pyclustering/cluster/clique.py @@ -280,7 +280,7 @@ def increment(self): class clique: - def __init__(self, data, amount_intervals, density_threshold): + def __init__(self, data, amount_intervals, density_threshold, ccore=True): self.__data = data self.__amount_intervals = amount_intervals self.__density_threshold = density_threshold @@ -299,6 +299,7 @@ def process(self): self.__allocate_clusters() self.__cells_map.clear() + return self def get_clusters(self): @@ -435,12 +436,3 @@ def __get_data_size_derscription(self): data_sizes[index_dimension] = max_corner[index_dimension] - min_corner[index_dimension] return data_sizes, min_corner, max_corner - - - -# block1 = clique_block() -# block1.logical_location = [1, 1] -# block2 = clique_block() -# block2.logical_location = [0, 1] -# -# print(block1.get_locaion_neighbors(3)) diff --git a/pyclustering/cluster/examples/bang_example.py b/pyclustering/cluster/examples/bang_example.py index f1f2414c..2a7a13ee 100755 --- a/pyclustering/cluster/examples/bang_example.py +++ b/pyclustering/cluster/examples/bang_example.py @@ -72,7 +72,7 @@ def template_segmentation(source, levels, threshold): def cluster_simple_sample(): - template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 8) + template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 3) template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 7) template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 7) template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 4, density_threshold=2.5) @@ -85,6 +85,7 @@ def cluster_simple_sample(): template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 7) template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 7) template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 7) + template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, 7) template_clustering(SIMPLE_SAMPLES.SAMPLE_ELONGATE, 7) diff --git a/pyclustering/cluster/examples/clique_example.py b/pyclustering/cluster/examples/clique_example.py index eccb8082..bb4575bb 100755 --- a/pyclustering/cluster/examples/clique_example.py +++ b/pyclustering/cluster/examples/clique_example.py @@ -69,6 +69,7 @@ def cluster_simple_sample(): template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 7, 0) template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 5, 0) template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 7, 0) + template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, 2, 0) template_clustering(SIMPLE_SAMPLES.SAMPLE_ELONGATE, 7, 0) @@ -77,7 +78,7 @@ def cluster_fcps(): template_clustering(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, 10, 0) template_clustering(FCPS_SAMPLES.SAMPLE_WING_NUT, 10, 0) template_clustering(FCPS_SAMPLES.SAMPLE_TARGET, 10, 0) - template_clustering(FCPS_SAMPLES.SAMPLE_HEPTA, 10, 0) + template_clustering(FCPS_SAMPLES.SAMPLE_HEPTA, 9, 0) template_clustering(FCPS_SAMPLES.SAMPLE_CHAINLINK, 10, 0) template_clustering(FCPS_SAMPLES.SAMPLE_TETRA, 10, 0) template_clustering(FCPS_SAMPLES.SAMPLE_ATOM, 10, 0) diff --git a/pyclustering/cluster/tests/clique_templates.py b/pyclustering/cluster/tests/clique_templates.py index addb0a1d..2a802f64 100755 --- a/pyclustering/cluster/tests/clique_templates.py +++ b/pyclustering/cluster/tests/clique_templates.py @@ -71,4 +71,35 @@ def clustering(path, intervals, density_threshold, expected_clusters, expected_n covered_points.add(index_point) assertion.eq(len(sample), len(covered_points)) - return clique_instance \ No newline at end of file + return clique_instance + + + @staticmethod + def visualize(path, levels, threshold, ccore, **kwargs): + sample = read_sample(path) + + clique_instance = clique(sample, levels, threshold, ccore) + clique_instance.process() + + cells = clique_instance.get_cells() + + clique_visualizer.show_grid(cells, sample) + + + @staticmethod + def exception(type, sample_storage, levels, threshold, ccore): + try: + sample = sample_storage + if isinstance(sample_storage, str): + sample = read_sample(sample_storage) + + bang_instance = clique(sample, levels, threshold, ccore) + bang_instance.process() + + except type: + return + + except Exception as ex: + raise AssertionError("Expected: '%s', Actual: '%s'" % (type, type(ex).__name__)) + + raise AssertionError("Expected: '%s', Actual: 'None'" % type) \ No newline at end of file diff --git a/pyclustering/cluster/tests/unit/__init__.py b/pyclustering/cluster/tests/unit/__init__.py index 1bc44d0f..a7ea4b0d 100755 --- a/pyclustering/cluster/tests/unit/__init__.py +++ b/pyclustering/cluster/tests/unit/__init__.py @@ -37,6 +37,7 @@ from pyclustering.cluster.tests.unit import ut_bsas as cluster_bsas_unit_tests from pyclustering.cluster.tests.unit import ut_center_initializer as cluster_center_initializer_unit_tests from pyclustering.cluster.tests.unit import ut_clarans as cluster_clarans_unit_tests +from pyclustering.cluster.tests.unit import ut_clique as cluster_clique_unit_tests from pyclustering.cluster.tests.unit import ut_cure as cluster_cure_unit_tests from pyclustering.cluster.tests.unit import ut_dbscan as cluster_dbscan_unit_tests from pyclustering.cluster.tests.unit import ut_elbow as cluster_elbow_unit_tests @@ -74,6 +75,7 @@ def fill_suite(unit_cluster_suite): unit_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_bsas_unit_tests)) unit_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_center_initializer_unit_tests)) unit_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_clarans_unit_tests)) + unit_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_clique_unit_tests)) unit_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_cure_unit_tests)) unit_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_dbscan_unit_tests)) unit_cluster_suite.addTests(unittest.TestLoader().loadTestsFromModule(cluster_elbow_unit_tests)) diff --git a/pyclustering/cluster/tests/unit/ut_bang.py b/pyclustering/cluster/tests/unit/ut_bang.py index 09cca31b..c5cf7b99 100755 --- a/pyclustering/cluster/tests/unit/ut_bang.py +++ b/pyclustering/cluster/tests/unit/ut_bang.py @@ -34,7 +34,7 @@ from pyclustering.samples.definitions import SIMPLE_SAMPLES -class bsas_unit_test(unittest.TestCase): +class bang_unit_test(unittest.TestCase): def test_clustering_sample_simple_1(self): bang_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 8, 0.0, [5, 5], 0, False) bang_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 7, 0.0, [5, 5], 0, False) @@ -44,6 +44,9 @@ def test_clustering_sample_simple_1(self): def test_clustering_sample_simple_1_one_cluster(self): bang_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, 0.0, [10], 0, False) + def test_clustering_diagonal_neighbors(self): + bang_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 3, 0.0, [10], 0, False) + def test_clustering_sample_simple_1_noise_only(self): bang_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 6, 1000.0, [], 10, False) bang_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 6, 0.0, [], 10, False, amount_threshold=20) diff --git a/pyclustering/cluster/tests/unit/ut_clique.py b/pyclustering/cluster/tests/unit/ut_clique.py index e1744c50..6311104f 100755 --- a/pyclustering/cluster/tests/unit/ut_clique.py +++ b/pyclustering/cluster/tests/unit/ut_clique.py @@ -29,6 +29,148 @@ import matplotlib matplotlib.use('Agg') +from pyclustering.cluster.clique import clique_block from pyclustering.cluster.tests.clique_templates import clique_test_template -from pyclustering.samples.definitions import SIMPLE_SAMPLES \ No newline at end of file +from pyclustering.tests.assertion import assertion + +from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES + + +class clique_unit_test(unittest.TestCase): + def test_clustering_sample_simple_1(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 8, 0, [5, 5], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 7, 0, [5, 5], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 6, 0, [5, 5], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 5, 0, [5, 5], 0, False) + + def test_clustering_sample_simple_1_one_cluster(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, 0, [10], 0, False) + + def test_clustering_diagonal_blocks_arent_neoghbors(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 2, 0, [5, 5], 0, False) + + def test_clustering_sample_simple_1_noise_only(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 6, 1000, [], 10, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 6, 10, [], 10, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 2, 5, [], 10, False) + + def test_clustering_sample_simple_2(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 7, 0, [5, 8, 10], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 6, 0, [5, 8, 10], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 1, 0, [23], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 6, 500, [], 23, False) + + def test_clustering_sample_simple_3(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 9, 0, [10, 10, 10, 30], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 8, 0, [10, 10, 10, 30], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 1, 0, [60], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 6, 500, [], 60, False) + + def test_clustering_sample_simple_3_one_point_noise(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 2, 9, [59], 1, False) + + def test_clustering_sample_simple_4_one_cluster(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 1, 0, [75], 0, False) + + def test_clustering_sample_simple_5(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 8, 0, [15, 15, 15, 15], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 7, 0, [15, 15, 15, 15], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 6, 0, [15, 15, 15, 15], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 5, 0, [15, 15, 15, 15], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 1, 0, [60], 0, False) + + def test_clustering_one_dimensional_data1(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE7, 4, 0, [10, 10], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE7, 2, 0, [20], 0, False) + + def test_clustering_one_dimensional_data2(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE8, 15, 0, [15, 20, 30, 80], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE8, 2, 0, [145], 0, False) + + def test_clustering_one_dimensional_data_3_Similar(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE9, 7, 0, [10, 20], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE9, 2, 0, [30], 0, False) + + def test_clustering_sample_simple_10(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 8, 0, [11, 11, 11], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 7, 0, [11, 11, 11], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 2, 0, [33], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE10, 1, 0, [33], 0, False) + + def test_clustering_three_dimensional_data1(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 6, 0, [10, 10], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 5, 0, [10, 10], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 1, 0, [20], 0, False) + + def test_clustering_similar_points(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 8, 0, [5, 5, 5], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 7, 0, [5, 5, 5], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 5, 0, [5, 5, 5], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 2, 0, [15], 0, False) + + def test_clustering_zero_column(self): + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, 3, 0, [5, 5], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, 2, 0, [5, 5], 0, False) + clique_test_template.clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, 1, 0, [10], 0, False) + + def test_clustering_fcps_lsun(self): + clique_test_template.clustering(FCPS_SAMPLES.SAMPLE_LSUN, 15, 0, [100, 101, 202], 0, False) + + def test_clustering_fcps_hepta(self): + clique_test_template.clustering(FCPS_SAMPLES.SAMPLE_HEPTA, 9, 0, [30, 30, 30, 30, 30, 30, 32], 0, False) + + + def test_visualize_no_failure_one_dimensional(self): + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE7, 4, 0, False) + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE8, 7, 0, False) + + def test_visualize_no_failure_two_dimensional(self): + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 8, 0, False) + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, 0, False) + + def test_visualize_no_failure_three_dimensional(self): + clique_test_template.visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE11, 3, 0, False) + + + def test_argument_invalid_levels(self): + clique_test_template.exception(ValueError, SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 0, 0.0, False) + clique_test_template.exception(ValueError, SIMPLE_SAMPLES.SAMPLE_SIMPLE1, -1, 0.0, False) + clique_test_template.exception(ValueError, SIMPLE_SAMPLES.SAMPLE_SIMPLE1, -10, 0.0, False) + + def test_argument_invalid_density(self): + clique_test_template.exception(ValueError, SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, -1.0, False) + clique_test_template.exception(ValueError, SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, -2.0, False) + + def test_argument_empty_data(self): + clique_test_template.exception(ValueError, [], 1, 0.0, False) + + def test_logical_block_neighbors(self): + block = clique_block() + block.logical_location = [1, 1] + + neighbors = block.get_location_neighbors(3) + assertion.eq(4, len(neighbors)) + assertion.true([0, 1] in neighbors) + assertion.true([2, 1] in neighbors) + assertion.true([1, 0] in neighbors) + assertion.true([1, 2] in neighbors) + + def test_logical_block_neighbors_on_edge(self): + block = clique_block() + block.logical_location = [1, 1] + + neighbors = block.get_location_neighbors(2) + assertion.eq(2, len(neighbors)) + assertion.true([0, 1] in neighbors) + assertion.true([1, 0] in neighbors) + + block.logical_location = [0, 0] + neighbors = block.get_location_neighbors(2) + assertion.eq(2, len(neighbors)) + assertion.true([0, 1] in neighbors) + assertion.true([1, 0] in neighbors) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file