Skip to content

Commit

Permalink
Merge pull request #2 from dbahrdt/master
Browse files Browse the repository at this point in the history
update
  • Loading branch information
somakolli authored Feb 9, 2019
2 parents c0d0373 + c344047 commit 6e96413
Show file tree
Hide file tree
Showing 4 changed files with 321 additions and 1 deletion.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ set(LIBOSCAR_SOURCES_CPP
src/CellDistanceBySphere.cpp
src/KVstats.cpp
src/KVClustering.cpp
src/KoMaClustering.cpp
)

add_library(${PROJECT_NAME} STATIC
Expand Down
93 changes: 93 additions & 0 deletions include/liboscar/KoMaClustering.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#pragma once
#ifndef OSCAR_WEB_KOMACLUSTERING_H
#define OSCAR_WEB_KOMACLUSTERING_H

#include <unordered_map>
#include <vector>
#include "OsmKeyValueObjectStore.h"
#include "KVClustering.h"

namespace liboscar {
namespace detail {
namespace KoMaClustering {
struct Data {
using KeyValue = std::pair<uint32_t, uint32_t>;
using KeyValueItemMap = std::unordered_map<KeyValue, std::vector<uint32_t>>;
using KeyValueCountVec = std::vector<std::pair<KeyValue, uint32_t>>;
KeyValueItemMap keyValueItemMap;
KeyValueCountVec keyValueCountVec;
KeyValueCountVec keyValueCountVecSortedByIds;
//const kvclustering::KeyExclusions &keyExclusions;
//const kvclustering::KeyValueExclusions &keyValueExclusions;
Data();
void update(const KeyValue &key, const uint32_t& itemId);
void sort();
};

struct State {
const Static::OsmKeyValueObjectStore &store;
const sserialize::CellQueryResult &cqr;
const kvclustering::KeyExclusions &keyExclusions;
const kvclustering::KeyValueExclusions &keyValueExclusions;
Data & d;
State(const Static::OsmKeyValueObjectStore &store,
const sserialize::CellQueryResult &cqr,
const kvclustering::KeyExclusions &keyExclusions,
const kvclustering::KeyValueExclusions &keyValueExclusions,
Data & d);
};

struct Worker {
State * state;
void operator()();
Worker(State * state);
};
} // end namespace KoMaClustering
} // end namespace detail

// Implementation
class KoMaClustering final : public kvclustering::Interface {
public:
using KeyValueInfo = kvclustering::KeyValueInfo;
using KeyInfo = kvclustering::KeyInfo;
using ValueInfo = kvclustering::ValueInfo;
using KeyValuePair = std::pair<uint32_t, uint32_t>;
using ValueCountPair = std::pair<uint32_t, uint32_t>;
public:
KoMaClustering(const Static::OsmKeyValueObjectStore &store,
const sserialize::CellQueryResult &cqr,
kvclustering::KeyExclusions &keyExclusions,
kvclustering::KeyValueExclusions &keyValueExclusions);

~KoMaClustering() override = default;

private:
const Static::OsmKeyValueObjectStore &store;
detail::KoMaClustering::Data m_data;
const sserialize::CellQueryResult &cqr;
kvclustering::KeyExclusions &keyExclusions;
kvclustering::KeyValueExclusions &keyValueExclusions;
public:
void preprocess() override;

std::vector<std::pair<uint32_t, std::list<std::pair<uint32_t, uint32_t>>>> facets(uint32_t k);

std::list<ValueCountPair> findValuesToKey(std::uint32_t keyId);

void exclude(const kvclustering::KeyExclusions & e) override;

void exclude(const kvclustering::KeyValueExclusions & e) override;

std::vector<KeyInfo> topKeys(uint32_t k) override;

std::vector<KeyValueInfo> topKeyValues(uint32_t k) override;;

//returns true if the number of intersections is greater than minNumber
template<typename It>
bool hasIntersection(It beginI, It endI, It beginJ, It endJ, const std::float_t &minNumber);
};


} // end namepsace liboscar

#endif //OSCAR_WEB_KOMACLUSTERING_H
226 changes: 226 additions & 0 deletions src/KoMaClustering.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
#include <liboscar/KoMaClustering.h>

namespace liboscar{
namespace detail {
namespace KoMaClustering {
void
Data::update(const KeyValue &key, const uint32_t& itemId) {
keyValueItemMap[key].emplace_back(itemId);
}
void
Data::sort() {
// sort
for (auto &keyValueItems : keyValueItemMap){
std::sort(keyValueItems.second.begin(), keyValueItems.second.end());
keyValueCountVec.emplace_back(std::make_pair(keyValueItems.first, keyValueItems.second.size()));
keyValueCountVecSortedByIds.emplace_back(std::make_pair(keyValueItems.first, keyValueItems.second.size()));
}
// sort all keyValues descending by itemCount to find the top KeyValues faster
std::sort(keyValueCountVec.begin(), keyValueCountVec.end(),
[](std::pair<KeyValue, std::uint32_t> const &a,
std::pair<KeyValue, std::uint32_t> const &b) {
return a.second != b.second ? a.second > b.second : a.first < b.first;
});
std::sort(keyValueCountVecSortedByIds.begin(), keyValueCountVecSortedByIds.end(),
[](std::pair<KeyValue, std::uint32_t> const &a,
std::pair<KeyValue, std::uint32_t> const &b) {
return a.first.first != b.first.first ? a.first.first < b.first.first : a.second > b.second;
});

}
Data::Data() {}

State::State(const Static::OsmKeyValueObjectStore &store, const sserialize::CellQueryResult &cqr,
const kvclustering::KeyExclusions &keyExclusions,
const kvclustering::KeyValueExclusions &keyValueExclusions,
Data & d) :
keyExclusions(keyExclusions),
keyValueExclusions(keyValueExclusions),
store(store),
cqr(cqr),
d(d)
{}
Worker::Worker(State * state) :
state(state)
{}
void Worker::operator()() {
for (sserialize::CellQueryResult::const_iterator it(state->cqr.begin()), end(state->cqr.end()); it != end; ++it) {
for (const uint32_t &x : it.idx()) {
const auto &item = state->store.kvBaseItem(x);
//iterate over all item keys-value items
for (uint32_t i = 0; i < item.size(); ++i) {

state->d.update(std::make_pair(item.keyId(i), item.valueId(i)), x);
}
}
}
}


} //end namespace KoMaClustering
} //end namespace detail

KoMaClustering::KoMaClustering(const Static::OsmKeyValueObjectStore &store,
const sserialize::CellQueryResult &cqr,
kvclustering::KeyExclusions &keyExclusions,
kvclustering::KeyValueExclusions &keyValueExclusions) :
store(store), cqr(cqr), keyExclusions(keyExclusions), keyValueExclusions(keyValueExclusions)
{}

void KoMaClustering::preprocess() {
auto state = detail::KoMaClustering::State(store, cqr, keyExclusions, keyValueExclusions, m_data);
auto worker = detail::KoMaClustering::Worker(&state);
worker();
state.d.sort();
}

void KoMaClustering::exclude(const kvclustering::KeyExclusions &e) {
keyExclusions + e;
}

void KoMaClustering::exclude(const kvclustering::KeyValueExclusions &e) {
keyValueExclusions + e;
}

std::vector<KoMaClustering::KeyInfo> KoMaClustering::topKeys(uint32_t k) {
return std::vector<KeyInfo>();
}

std::vector<KoMaClustering::KeyValueInfo> KoMaClustering::topKeyValues(uint32_t k) {
std::vector<std::pair<KeyValuePair, std::uint32_t>> result;
auto &countVec = m_data.keyValueCountVec;
auto &itemMap = m_data.keyValueItemMap;
auto itI = countVec.begin() + 1;
bool startParentsFound = false;
std::float_t maxNumberOfIntersections;

for (; itI < countVec.end(); ++itI) {
if(keyExclusions.contains(itI->first.first))
continue;
if(keyValueExclusions.contains(itI -> first.first, itI->first.second))
continue;
for (auto itJ = countVec.begin(); itJ < itI; ++itJ) {
if(keyExclusions.contains(itJ->first.first))
continue;
if(keyValueExclusions.contains(itJ -> first.first, itJ->first.second))
continue;
const std::vector<uint32_t> &setI = itemMap[itI->first];
const std::vector<uint32_t> &setJ = itemMap[itJ->first];

maxNumberOfIntersections = (setI.size() + setJ.size()) / 200.0f;
if (!hasIntersection(setI.begin(), setI.end(), setJ.begin(), setJ.end(), maxNumberOfIntersections)) {
// no required amount of intersections
// add both parents to results
result.emplace_back(itJ->first, itJ->second);
result.emplace_back(itI->first, itI->second);
//end the algorithm
startParentsFound = true;
break;
}
}
if(startParentsFound)
break;
}

if (startParentsFound) {
for (auto itK = itI + 1; itK < countVec.end() && result.size() < k; ++itK) {
if(keyExclusions.contains(itK->first.first))
continue;
if(keyValueExclusions.contains(itK -> first.first, itK->first.second))
continue;
bool discarded = false;
for (auto &parentPair : result) {
maxNumberOfIntersections = (parentPair.second + (*itK).second) / 200.0f;
const std::vector<uint32_t> &setI = itemMap[(*itK).first];
const std::vector<uint32_t> &setJ = itemMap[parentPair.first];
if (hasIntersection(setI.begin(), setI.end(), setJ.begin(), setJ.end(), maxNumberOfIntersections)) {
discarded = true;
break;
}
}
if (!discarded) {
//parent does not intersect with previous found parents; add to results
result.emplace_back(*itK);
}
}
}
std::vector<KeyValueInfo> keyValueResult;
for (auto &keyValuePair : result) {
keyValueResult.emplace_back(KeyValueInfo(KeyInfo(keyValuePair.first.first, keyValuePair.second),
ValueInfo(keyValuePair.first.second, keyValuePair.second)));
}

return keyValueResult;
}
template<typename It>
bool KoMaClustering::hasIntersection(
It beginI, It endI, It beginJ, It endJ, const std::float_t &minNumber) {
std::uint32_t intersectionCount = 0;
while (beginI != endI && beginJ != endJ) {
if (*beginI < *beginJ) ++beginI;
else if (*beginJ < *beginI) ++beginJ;
else {
++beginI;
++beginJ;
if (++intersectionCount > minNumber) {
return true;
};
}
}
return false;
}

std::vector<std::pair<uint32_t, std::list<std::pair<uint32_t, uint32_t>>>>
KoMaClustering::facets(uint32_t k) {
std::vector<std::pair<uint32_t, std::list<std::pair<uint32_t, uint32_t>>>> result;
for(auto i = 0; i < k; ++i){
auto keyId = topKeyValues(1).at(0).ki.keyId;
const auto& valueVector = findValuesToKey(keyId);
result.emplace_back(keyId, valueVector);
keyExclusions.add(keyId);
keyExclusions.preprocess();
}
return result;
}


std::list<KoMaClustering::ValueCountPair> KoMaClustering::findValuesToKey(std::uint32_t keyId) {
std::list<KoMaClustering::ValueCountPair> result;
// binary search key
size_t lb = 0;
size_t ub = m_data.keyValueCountVecSortedByIds.size();

while(ub >= lb) {
size_t mid = lb + (ub - lb) / 2;
if(m_data.keyValueCountVecSortedByIds.at(mid).first.first == keyId ) {
// found key now go backwards and forwards to find all other key values with the same key
for(size_t i = mid; i > 0; i-- ){
if(m_data.keyValueCountVecSortedByIds.at(i).first.first == keyId){
auto& keyValuePair = m_data.keyValueCountVecSortedByIds.at(i);
// emplace in front of result so that ordering of counts is correct
result.emplace(result.begin(), std::make_pair(keyValuePair.first.second, keyValuePair.second));
} else {
break;
}
}
for(size_t i = mid + 1; i < m_data.keyValueCountVecSortedByIds.size(); i++){
if(m_data.keyValueCountVecSortedByIds.at(i).first.first == keyId){
auto& keyValuePair = m_data.keyValueCountVecSortedByIds.at(i);
result.emplace_back(keyValuePair.first.second, keyValuePair.second);
} else {
break;
}
}
break;
}
else if(m_data.keyValueCountVecSortedByIds.at(mid).first.first < keyId) {
lb = mid + 1;
} else {
ub = mid - 1;
}
}

return result;
}
} //end namespace liboscar

2 changes: 1 addition & 1 deletion vendor/sserialize

0 comments on commit 6e96413

Please sign in to comment.