From 7f0d5be3a1fdc7c9f47652ae03fbee35c062cac4 Mon Sep 17 00:00:00 2001 From: annoviko Date: Mon, 9 Sep 2019 13:27:13 +0200 Subject: [PATCH] #543: [pyclustering.cluster.silhouette] Distance matrix support. --- CHANGES | 3 + ccore/src/cluster/silhouette.cpp | 11 ++++ ccore/src/cluster/silhouette.hpp | 12 +++- ccore/src/interface/interface_property.cpp | 2 +- ccore/src/interface/silhouette_interface.cpp | 5 +- ccore/src/interface/silhouette_interface.h | 4 +- ccore/src/utils/metric.hpp | 63 ++++++++++++------- ccore/tst/utest-interface-silhouette.cpp | 20 ++++++ ccore/tst/utest-silhouette.cpp | 54 +++++++++++++++- pyclustering/cluster/dbscan.py | 3 +- pyclustering/cluster/kmeans.py | 12 ++-- pyclustering/cluster/silhouette.py | 13 +++- .../tests/integration/it_silhouette.py | 30 +++++++++ .../cluster/tests/silhouette_templates.py | 22 ++++++- .../cluster/tests/unit/ut_silhouette.py | 29 +++++++++ pyclustering/core/converter.py | 2 +- pyclustering/core/silhouette_wrapper.py | 6 +- pyclustering/core/wrapper.py | 10 +-- pyclustering/utils/__init__.py | 9 +-- 19 files changed, 255 insertions(+), 55 deletions(-) diff --git a/CHANGES b/CHANGES index b19eb027..12a8f11d 100755 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,9 @@ CHANGE NOTES FOR 0.9.2 (STARTED Sep 9, 2019), (RELEASED: -) ------------------------------------------------------------------------ GENERAL CHANGES: +- Introduced parameter `data_type` to Silhouette method to use distance matrix (pyclustering.cluster.silhouette, ccore.clst.silhouette). + See: https://github.com/annoviko/pyclustering/issues/543 + - Optimization of HHN (Hodgkin-Huxley Neural Network) by parallel processing (ccore.nnet.hhn). See: https://github.com/annoviko/pyclustering/issues/541 diff --git a/ccore/src/cluster/silhouette.cpp b/ccore/src/cluster/silhouette.cpp index 2ec76e97..a6c8cddd 100755 --- a/ccore/src/cluster/silhouette.cpp +++ b/ccore/src/cluster/silhouette.cpp @@ -34,9 +34,15 @@ silhouette::silhouette(const distance_metric & p_metric) : void silhouette::process(const dataset & p_data, const cluster_sequence & p_clusters, silhouette_data & p_result) { + process(p_data, p_clusters, silhouette_data_t::POINTS, p_result); +} + + +void silhouette::process(const dataset & p_data, const cluster_sequence & p_clusters, const silhouette_data_t & p_type, silhouette_data & p_result) { m_data = &p_data; m_clusters = &p_clusters; m_result = &p_result; + m_type = p_type; m_result->get_score().reserve(m_data->size()); @@ -61,6 +67,11 @@ double silhouette::calculate_score(const std::size_t p_index_point, const std::s void silhouette::calculate_dataset_difference(const std::size_t p_index_point, std::vector & p_dataset_difference) const { + if (m_type == silhouette_data_t::DISTANCE_MATRIX) { + p_dataset_difference = m_data->at(p_index_point); + return; + } + p_dataset_difference.reserve(m_data->size()); const auto & current_point = m_data->at(p_index_point); diff --git a/ccore/src/cluster/silhouette.hpp b/ccore/src/cluster/silhouette.hpp index e2d98f7b..542e2264 100755 --- a/ccore/src/cluster/silhouette.hpp +++ b/ccore/src/cluster/silhouette.hpp @@ -39,13 +39,21 @@ namespace ccore { namespace clst { +enum class silhouette_data_t { + POINTS, + DISTANCE_MATRIX +}; + + class silhouette { private: const dataset * m_data = nullptr; /* temporary object, exists during processing */ const cluster_sequence * m_clusters = nullptr; /* temporary object, exists during processing */ silhouette_data * m_result = nullptr; /* temporary object, exists during processing */ - distance_metric m_metric = distance_metric_factory::euclidean_square(); + silhouette_data_t m_type = silhouette_data_t::POINTS; + + distance_metric m_metric = distance_metric_factory::euclidean_square(); public: silhouette(void) = default; @@ -61,6 +69,8 @@ class silhouette { public: void process(const dataset & p_data, const cluster_sequence & p_clusters, silhouette_data & p_result); + void process(const dataset & p_data, const cluster_sequence & p_clusters, const silhouette_data_t & p_type, silhouette_data & p_result); + private: double calculate_score(const std::size_t p_index_point, const std::size_t p_index_cluster) const; diff --git a/ccore/src/interface/interface_property.cpp b/ccore/src/interface/interface_property.cpp index ca1acc77..255a61a4 100755 --- a/ccore/src/interface/interface_property.cpp +++ b/ccore/src/interface/interface_property.cpp @@ -25,7 +25,7 @@ const char * INTERFACE_DESCRIPTION = "ccore library is a C/C++ part of pyclustering library"; -const char * INTERFACE_VERSION = "0.9.1"; +const char * INTERFACE_VERSION = "0.9.2"; void * get_interface_description(void) { diff --git a/ccore/src/interface/silhouette_interface.cpp b/ccore/src/interface/silhouette_interface.cpp index f48cbe73..0ecfee39 100755 --- a/ccore/src/interface/silhouette_interface.cpp +++ b/ccore/src/interface/silhouette_interface.cpp @@ -43,7 +43,8 @@ ccore::clst::silhouette_ksearch_allocator::ptr get_silhouette_ksearch_allocator( pyclustering_package * silhouette_algorithm( const pyclustering_package * const p_sample, const pyclustering_package * const p_clusters, - const void * const p_metric) + const void * const p_metric, + const std::size_t p_data_type) { dataset data; p_sample->extract(data); @@ -58,7 +59,7 @@ pyclustering_package * silhouette_algorithm( metric = &default_metric; ccore::clst::silhouette_data result; - ccore::clst::silhouette(*metric).process(data, clusters, result); + ccore::clst::silhouette(*metric).process(data, clusters, static_cast(p_data_type), result); return create_package(&result.get_score()); } diff --git a/ccore/src/interface/silhouette_interface.h b/ccore/src/interface/silhouette_interface.h index 71baaf27..012c5062 100755 --- a/ccore/src/interface/silhouette_interface.h +++ b/ccore/src/interface/silhouette_interface.h @@ -77,6 +77,7 @@ ccore::clst::silhouette_ksearch_allocator::ptr get_silhouette_ksearch_allocator( * @param[in] p_sample: input data for clustering. * @param[in] p_clusters: clusters that have been allocated for that data. * @param[in] p_metric: pointer to distance metric 'distance_metric' that is used for distance calculation between two points. + * @param[in] p_data_type: defines data type that is used for clustering process ('0' - points, '1' - distance matrix). * * @return Returns Silhouette's analysis results as a pyclustering package [ scores ]. * @@ -84,7 +85,8 @@ ccore::clst::silhouette_ksearch_allocator::ptr get_silhouette_ksearch_allocator( extern "C" DECLARATION pyclustering_package * silhouette_algorithm( const pyclustering_package * const p_sample, const pyclustering_package * const p_clusters, - const void * const p_metric); + const void * const p_metric, + const std::size_t p_data_type); /** diff --git a/ccore/src/utils/metric.hpp b/ccore/src/utils/metric.hpp index 763f9665..e3a64592 100755 --- a/ccore/src/utils/metric.hpp +++ b/ccore/src/utils/metric.hpp @@ -230,30 +230,6 @@ double chi_square_distance(const TypeContainer & point1, const TypeContainer & p } -/** - * - * @brief Calculates distance matrix using points container. - * - * @param[in] p_points: input data that is represented by points. - * @param[out] p_distance_matrix: output distance matrix of points. - * - */ -template -void distance_matrix(const TypeContainer & p_points, TypeContainer & p_distance_matrix) { - using TypeElement = typename TypeContainer::value_type; - - p_distance_matrix = TypeContainer(p_points.size(), TypeElement(p_points.size(), 0.0)); - - for (std::size_t i = 0; i < p_points.size(); i++) { - for (std::size_t j = i + 1; j < p_points.size(); j++) { - const double distance = euclidean_distance(p_points.at(i), p_points.at(j)); - p_distance_matrix[i][j] = distance; - p_distance_matrix[j][i] = distance; - } - } -} - - /** * * @brief Basic distance metric provides interface for calculation distance between objects in line with @@ -553,6 +529,45 @@ double farthest_distance(const TypeContainer & p_container, const distance_metri } +/** + * + * @brief Calculates distance matrix using points container using Euclidean distance. + * + * @param[in] p_points: input data that is represented by points. + * @param[in] p_metric: metric for distance calculation between points. + * @param[out] p_distance_matrix: output distance matrix of points. + * + */ +template +void distance_matrix(const TypeContainer & p_points, const distance_metric & p_metric, TypeContainer & p_distance_matrix) { + using TypeElement = typename TypeContainer::value_type; + + p_distance_matrix = TypeContainer(p_points.size(), TypeElement(p_points.size(), 0.0)); + + for (std::size_t i = 0; i < p_points.size(); i++) { + for (std::size_t j = i + 1; j < p_points.size(); j++) { + const double distance = p_metric(p_points.at(i), p_points.at(j)); + p_distance_matrix[i][j] = distance; + p_distance_matrix[j][i] = distance; + } + } +} + + +/** + * + * @brief Calculates distance matrix using points container using Euclidean distance. + * + * @param[in] p_points: input data that is represented by points. + * @param[out] p_distance_matrix: output distance matrix of points. + * + */ +template +void distance_matrix(const TypeContainer & p_points, TypeContainer & p_distance_matrix) { + distance_matrix(p_points, distance_metric_factory::euclidean(), p_distance_matrix); +} + + } } diff --git a/ccore/tst/utest-interface-silhouette.cpp b/ccore/tst/utest-interface-silhouette.cpp index 19d9797c..0afd1ea8 100755 --- a/ccore/tst/utest-interface-silhouette.cpp +++ b/ccore/tst/utest-interface-silhouette.cpp @@ -28,11 +28,31 @@ #include "samples.hpp" +#include "answer.hpp" +#include "answer_reader.hpp" #include "utenv_utils.hpp" #include +TEST(utest_interface_silhouette, silhouette) { + dataset_ptr data = simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_01); + answer ans = answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_01); + + std::shared_ptr data_package = pack(*data); + std::shared_ptr cluster_package = pack(ans.clusters()); + + pyclustering_package * result_points = silhouette_algorithm(data_package.get(), cluster_package.get(), nullptr, 0); + pyclustering_package * result_matrix = silhouette_algorithm(data_package.get(), cluster_package.get(), nullptr, 1); + + ASSERT_NE(nullptr, result_points); + ASSERT_NE(nullptr, result_matrix); + + ASSERT_EQ(result_points->size, result_matrix->size); + ASSERT_EQ(result_points->type, result_matrix->type); +} + + TEST(utest_interface_silhouette, silhouette_ksearch) { dataset_ptr data = simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_01); std::shared_ptr sample = pack(*data); diff --git a/ccore/tst/utest-silhouette.cpp b/ccore/tst/utest-silhouette.cpp index 4c34a84d..1aab3892 100755 --- a/ccore/tst/utest-silhouette.cpp +++ b/ccore/tst/utest-silhouette.cpp @@ -33,7 +33,7 @@ using namespace ccore::clst; -static void template_correct_scores(const dataset_ptr & p_data, const answer & p_answer) { +void template_correct_scores(const dataset_ptr & p_data, const answer & p_answer) { silhouette_data result; silhouette().process(*p_data, p_answer.clusters(), result); @@ -45,6 +45,25 @@ static void template_correct_scores(const dataset_ptr & p_data, const answer & p } +void template_correct_score_data_types(const dataset_ptr & p_data, const answer & p_answer) { + silhouette_data result_points, result_matrix; + + dataset matrix; + distance_matrix(*p_data, distance_metric_factory::euclidean_square(), matrix); + + silhouette().process(*p_data, p_answer.clusters(), silhouette_data_t::POINTS, result_points); + silhouette().process(matrix, p_answer.clusters(), silhouette_data_t::DISTANCE_MATRIX, result_matrix); + + ASSERT_EQ(p_data->size(), result_points.get_score().size()); + ASSERT_EQ(p_data->size(), result_matrix.get_score().size()); + + const auto & scores_points = result_points.get_score(); + const auto & scores_matrix = result_matrix.get_score(); + + ASSERT_EQ(scores_points, scores_matrix); +} + + TEST(utest_silhouette, correct_score_simple01) { template_correct_scores(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_01), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_01)); } @@ -76,3 +95,36 @@ TEST(utest_silhouette, correct_score_simple07) { TEST(utest_silhouette, correct_score_simple08) { template_correct_scores(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_08), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_08)); } + + +TEST(utest_silhouette, correct_score_distance_matrix_simple01) { + template_correct_score_data_types(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_01), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_01)); +} + +TEST(utest_silhouette, correct_score_distance_matrix_simple02) { + template_correct_score_data_types(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_02), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_02)); +} + +TEST(utest_silhouette, correct_score_distance_matrix_simple03) { + template_correct_score_data_types(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_03), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_03)); +} + +TEST(utest_silhouette, correct_score_distance_matrix_simple04) { + template_correct_score_data_types(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_04), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_04)); +} + +TEST(utest_silhouette, correct_score_distance_matrix_simple05) { + template_correct_score_data_types(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_05), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_05)); +} + +TEST(utest_silhouette, correct_score_distance_matrix_simple06) { + template_correct_score_data_types(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_06), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_06)); +} + +TEST(utest_silhouette, correct_score_distance_matrix_simple07) { + template_correct_score_data_types(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_07), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_07)); +} + +TEST(utest_silhouette, correct_score_distance_matrix_simple08) { + template_correct_score_data_types(simple_sample_factory::create_sample(SAMPLE_SIMPLE::SAMPLE_SIMPLE_08), answer_reader::read(SAMPLE_SIMPLE::SAMPLE_SIMPLE_08)); +} diff --git a/pyclustering/cluster/dbscan.py b/pyclustering/cluster/dbscan.py index 5556442e..424aad3a 100755 --- a/pyclustering/cluster/dbscan.py +++ b/pyclustering/cluster/dbscan.py @@ -74,7 +74,8 @@ def __init__(self, data, eps, neighbors, ccore = True, **kwargs): """! @brief Constructor of clustering algorithm DBSCAN. - @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple. + @param[in] data (list): Input data that is presented as list of points or distance matrix (defined by parameter + 'data_type', by default data is considered as a list of points). @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less then the radius. @param[in] neighbors (uint): minimum number of shared neighbors that is required for establish links between points. @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem. diff --git a/pyclustering/cluster/kmeans.py b/pyclustering/cluster/kmeans.py index fc892e6f..23619159 100755 --- a/pyclustering/cluster/kmeans.py +++ b/pyclustering/cluster/kmeans.py @@ -59,9 +59,9 @@ def __init__(self): @brief Initializer of observer of K-Means algorithm. """ - self.__evolution_clusters = [] - self.__evolution_centers = [] - self.__initial_centers = [] + self.__evolution_clusters = [] + self.__evolution_centers = [] + self.__initial_centers = [] def __len__(self): @@ -168,9 +168,9 @@ def show_clusters(sample, clusters, centers, initial_centers = None, **kwargs): display = kwargs.get('display', True) if figure is None: - figure = visualizer.show(display = False) + figure = visualizer.show(display=False) else: - visualizer.show(figure = figure, display = False) + visualizer.show(figure=figure, display=False) kmeans_visualizer.__draw_centers(figure, offset, visualizer, centers, initial_centers) kmeans_visualizer.__draw_rays(figure, offset, visualizer, sample, clusters, centers) @@ -229,7 +229,7 @@ def __draw_centers(figure, offset, visualizer, centers, initial_centers): @staticmethod - def animate_cluster_allocation(data, observer, animation_velocity = 500, movie_fps = 1, save_movie = None): + def animate_cluster_allocation(data, observer, animation_velocity=500, movie_fps=1, save_movie=None): """! @brief Animates clustering process that is performed by K-Means algorithm. diff --git a/pyclustering/cluster/silhouette.py b/pyclustering/cluster/silhouette.py index b705905f..670481e9 100755 --- a/pyclustering/cluster/silhouette.py +++ b/pyclustering/cluster/silhouette.py @@ -85,19 +85,23 @@ def __init__(self, data, clusters, **kwargs): """! @brief Initializes Silhouette method for analysis. - @param[in] data (array_like): Input data that was used for cluster analysis. + @param[in] data (array_like): Input data that was used for cluster analysis and that is presented as list of + points or distance matrix (defined by parameter 'data_type', by default data is considered as a list + of points). @param[in] clusters (list): Cluster that have been obtained after cluster analysis. @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric'). Keyword Args:
- metric (distance_metric): Metric that was used for cluster analysis and should be used for Silhouette score calculation (by default Square Euclidean distance). + - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix'). - ccore (bool): If True then CCORE (C++ implementation of pyclustering library) is used (by default True). """ self.__data = data self.__clusters = clusters self.__metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE)) + self.__data_type = kwargs.get('data_type', 'points') if self.__metric.get_type() != type_metric.USER_DEFINED: self.__metric.enable_numpy_usage() @@ -135,7 +139,7 @@ def __process_by_ccore(self): """ ccore_metric = metric_wrapper.create_instance(self.__metric) - self.__score = wrapper.silhoeutte(self.__data, self.__clusters, ccore_metric.get_pointer()) + self.__score = wrapper.silhoeutte(self.__data, self.__clusters, ccore_metric.get_pointer(), self.__data_type) def __process_by_python(self): @@ -168,7 +172,10 @@ def __calculate_score(self, index_point, index_cluster): @return (float) Silhouette score for the object. """ - difference = self.__calculate_dataset_difference(index_point) + if self.__data_type == 'points': + difference = self.__calculate_dataset_difference(index_point) + else: + difference = self.__data[index_point] a_score = self.__calculate_within_cluster_score(index_cluster, difference) b_score = self.__caclulate_optimal_neighbor_cluster_score(index_cluster, difference) diff --git a/pyclustering/cluster/tests/integration/it_silhouette.py b/pyclustering/cluster/tests/integration/it_silhouette.py index 49362920..2ddfd966 100755 --- a/pyclustering/cluster/tests/integration/it_silhouette.py +++ b/pyclustering/cluster/tests/integration/it_silhouette.py @@ -134,5 +134,35 @@ def test_correct_ksearch_simple13(self): silhouette_test_template.correct_ksearch(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, SIMPLE_ANSWERS.ANSWER_SIMPLE13, 2, 10, silhouette_ksearch_type.KMEANS, True) + + def test_distance_matrix_sample01(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, + SIMPLE_ANSWERS.ANSWER_SIMPLE1, True) + + def test_distance_matrix_sample02(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, + SIMPLE_ANSWERS.ANSWER_SIMPLE2, True) + + def test_distance_matrix_sample03(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, + SIMPLE_ANSWERS.ANSWER_SIMPLE3, True) + + def test_distance_matrix_sample04(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, + SIMPLE_ANSWERS.ANSWER_SIMPLE4, True) + + def test_distance_matrix_sample05(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, + SIMPLE_ANSWERS.ANSWER_SIMPLE5, True) + + def test_distance_matrix_sample06(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE6, + SIMPLE_ANSWERS.ANSWER_SIMPLE6, True) + + def test_distance_matrix_sample07(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE7, + SIMPLE_ANSWERS.ANSWER_SIMPLE7, True) + + if __name__ == "__main__": unittest.main() \ No newline at end of file diff --git a/pyclustering/cluster/tests/silhouette_templates.py b/pyclustering/cluster/tests/silhouette_templates.py index 82e193b8..318953a2 100755 --- a/pyclustering/cluster/tests/silhouette_templates.py +++ b/pyclustering/cluster/tests/silhouette_templates.py @@ -30,22 +30,38 @@ from pyclustering.tests.assertion import assertion -from pyclustering.utils import read_sample +from pyclustering.utils import read_sample, calculate_distance_matrix, distance_metric, type_metric class silhouette_test_template: @staticmethod - def correct_scores(sample_path, answer_path, ccore_flag): + def correct_scores(sample_path, answer_path, ccore_flag, **kwargs): + data_type = kwargs.get('data_type', 'points') + sample = read_sample(sample_path) + if data_type == 'distance_matrix': + sample = calculate_distance_matrix(sample, distance_metric(type_metric.EUCLIDEAN_SQUARE)) + clusters = answer_reader(answer_path).get_clusters() - scores = silhouette(sample, clusters, ccore=ccore_flag).process().get_score() + scores = silhouette(sample, clusters, ccore=ccore_flag, data_type=data_type).process().get_score() assertion.eq(len(sample), len(scores)) for score in scores: assertion.le(-1.0, score) assertion.ge(1.0, score) + return scores + + + @staticmethod + def correct_processing_data_types(sample_path, answer_path, ccore_flag): + scores_points = silhouette_test_template.correct_scores(sample_path, answer_path, ccore_flag, data_type='points') + scores_matrix = silhouette_test_template.correct_scores(sample_path, answer_path, ccore_flag, data_type='distance_matrix') + + assertion.eq(len(scores_points), len(scores_matrix)) + assertion.eq(scores_points, scores_matrix) + @staticmethod def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): diff --git a/pyclustering/cluster/tests/unit/ut_silhouette.py b/pyclustering/cluster/tests/unit/ut_silhouette.py index 79e5fca8..47b00db2 100755 --- a/pyclustering/cluster/tests/unit/ut_silhouette.py +++ b/pyclustering/cluster/tests/unit/ut_silhouette.py @@ -134,5 +134,34 @@ def test_correct_ksearch_simple13(self): silhouette_test_template.correct_ksearch(SIMPLE_SAMPLES.SAMPLE_SIMPLE13, SIMPLE_ANSWERS.ANSWER_SIMPLE13, 2, 10, silhouette_ksearch_type.KMEANS, False) + def test_distance_matrix_sample01(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, + SIMPLE_ANSWERS.ANSWER_SIMPLE1, False) + + def test_distance_matrix_sample02(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, + SIMPLE_ANSWERS.ANSWER_SIMPLE2, False) + + def test_distance_matrix_sample03(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, + SIMPLE_ANSWERS.ANSWER_SIMPLE3, False) + + def test_distance_matrix_sample04(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, + SIMPLE_ANSWERS.ANSWER_SIMPLE4, False) + + def test_distance_matrix_sample05(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, + SIMPLE_ANSWERS.ANSWER_SIMPLE5, False) + + def test_distance_matrix_sample06(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE6, + SIMPLE_ANSWERS.ANSWER_SIMPLE6, False) + + def test_distance_matrix_sample07(self): + silhouette_test_template.correct_processing_data_types(SIMPLE_SAMPLES.SAMPLE_SIMPLE7, + SIMPLE_ANSWERS.ANSWER_SIMPLE7, False) + + if __name__ == "__main__": unittest.main() diff --git a/pyclustering/core/converter.py b/pyclustering/core/converter.py index 48218e93..54963250 100755 --- a/pyclustering/core/converter.py +++ b/pyclustering/core/converter.py @@ -24,7 +24,7 @@ """ -from ctypes import c_size_t; +from ctypes import c_size_t def convert_data_type(data_type): diff --git a/pyclustering/core/silhouette_wrapper.py b/pyclustering/core/silhouette_wrapper.py index 38d6d668..a2852b90 100755 --- a/pyclustering/core/silhouette_wrapper.py +++ b/pyclustering/core/silhouette_wrapper.py @@ -26,6 +26,7 @@ from ctypes import c_double, c_size_t, POINTER +from pyclustering.core.converter import convert_data_type from pyclustering.core.wrapper import ccore_library from pyclustering.core.pyclustering_package import pyclustering_package, package_builder, package_extractor @@ -36,13 +37,14 @@ class silhouette_ksearch_package_indexer: SILHOUETTE_KSEARCH_PACKAGE_INDEX_SCORES = 2 -def silhoeutte(sample, clusters, pointer_metric): +def silhoeutte(sample, clusters, pointer_metric, data_type): pointer_data = package_builder(sample, c_double).create() pointer_clusters = package_builder(clusters, c_size_t).create() + c_data_type = convert_data_type(data_type) ccore = ccore_library.get() ccore.silhouette_algorithm.restype = POINTER(pyclustering_package) - package = ccore.silhouette_algorithm(pointer_data, pointer_clusters, pointer_metric) + package = ccore.silhouette_algorithm(pointer_data, pointer_clusters, pointer_metric, c_data_type) result = package_extractor(package).extract() ccore.free_pyclustering_package(package) diff --git a/pyclustering/core/wrapper.py b/pyclustering/core/wrapper.py index ad41321f..1c9268aa 100755 --- a/pyclustering/core/wrapper.py +++ b/pyclustering/core/wrapper.py @@ -31,14 +31,14 @@ from pyclustering.core.definitions import * -ccore_library_instance = None -ccore_library_version = "0.9.1" +ccore_library_instance = None +ccore_library_version = "0.9.2" class ccore_library: - __library = None - __workable = False - __initialized = False + __library = None + __workable = False + __initialized = False @staticmethod def get(): diff --git a/pyclustering/utils/__init__.py b/pyclustering/utils/__init__.py index fa2aa9eb..dd3ed463 100755 --- a/pyclustering/utils/__init__.py +++ b/pyclustering/utils/__init__.py @@ -75,18 +75,19 @@ def read_sample(filename): return sample -def calculate_distance_matrix(sample): +def calculate_distance_matrix(sample, metric=distance_metric(type_metric.EUCLIDEAN)): """! - @brief Calculates distance matrix for data sample (sequence of points) using Euclidean distance as a metric. + @brief Calculates distance matrix for data sample (sequence of points) using specified metric (by default Euclidean distance). @param[in] sample (array_like): Data points that are used for distance calculation. + @param[in] metric (distance_metric): Metric that is used for distance calculation between two points. @return (list) Matrix distance between data points. """ - amount_rows = len(sample); - return [ [ euclidean_distance(sample[i], sample[j]) for j in range(amount_rows) ] for i in range(amount_rows) ]; + amount_rows = len(sample) + return [[metric(sample[i], sample[j]) for j in range(amount_rows)] for i in range(amount_rows)] def read_image(filename):