Skip to content

Commit

Permalink
Merge pull request #7144 from realm/tg/set-fixes
Browse files Browse the repository at this point in the history
Use typed comparisons for set algebra on strings and binary
  • Loading branch information
jedelbo authored Nov 17, 2023
2 parents 870615d + 7e7b52a commit b0b4936
Show file tree
Hide file tree
Showing 7 changed files with 294 additions and 197 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

### Fixed
* <How do the end-user experience this issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)
* None.
* `Set::assign_intersection()` on `Set<StringData>`, `Set<BinaryData>`, and `Set<Mixed>` containing string or binary would cause a use-after-free if a set was intersected with itself ([PR #7144](https://github.com/realm/realm-core/pull/7144), since v10.0.0).
* Set algebra on `Set<StringData>` and `Set<BinaryData>` gave incorrect results when used on platforms where `char` is signed ([#7135](https://github.com/realm/realm-core/issues/7135), since v13.23.3).

### Breaking changes
* None.
Expand Down
22 changes: 11 additions & 11 deletions src/realm/collection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@ class CollectionBase {
return get_table()->get_column_name(get_col_key());
}

bool operator==(const CollectionBase& other) const noexcept
{
return get_table() == other.get_table() && get_owner_key() == other.get_owner_key() &&
get_col_key() == other.get_col_key();
}

bool operator!=(const CollectionBase& other) const noexcept
{
return !(*this == other);
}

// These are shadowed by typed versions in subclasses
using value_type = Mixed;
CollectionIterator<CollectionBase> begin() const;
Expand Down Expand Up @@ -373,17 +384,6 @@ class CollectionBaseImpl : public Interface, protected ArrayParent {
using Interface::get_table;
using Interface::get_target_table;

bool operator==(const CollectionBaseImpl& other) const noexcept
{
return get_table() == other.get_table() && get_owner_key() == other.get_owner_key() &&
get_col_key() == other.get_col_key();
}

bool operator!=(const CollectionBaseImpl& other) const noexcept
{
return !(*this == other);
}

protected:
Obj m_obj;
ColKey m_col_key;
Expand Down
259 changes: 130 additions & 129 deletions src/realm/set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,185 +176,186 @@ void SetBase::clear_repl(Replication* repl) const
repl->set_clear(*this);
}

static std::vector<Mixed> convert_to_set(const CollectionBase& rhs)
namespace {
template <typename T>
std::vector<T> convert_to_set(const CollectionBase& rhs)
{
std::vector<T> vec;
vec.reserve(rhs.size());
for (Mixed value : rhs) {
vec.push_back(value.get<T>());
}
std::sort(vec.begin(), vec.end(), SetElementLessThan<T>());
vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
return vec;
}

template <>
std::vector<Mixed> convert_to_set<Mixed>(const CollectionBase& rhs)
{
std::vector<Mixed> mixed(rhs.begin(), rhs.end());
std::sort(mixed.begin(), mixed.end(), SetElementLessThan<Mixed>());
mixed.erase(std::unique(mixed.begin(), mixed.end()), mixed.end());
return mixed;
}

bool SetBase::is_subset_of(const CollectionBase& rhs) const
template <typename Set, typename T, typename Fn>
auto to_set_if_needed(const SetBase& set, const CollectionBase& rhs, Fn fn)
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return is_subset_of(other_set->begin(), other_set->end());
SetElementLessThan<T> cmp;
auto& typed_set = static_cast<const Set&>(set);
if (typeid(rhs) == typeid(Set)) {
return fn(typed_set, static_cast<const Set&>(rhs), cmp);
}
auto other_set = convert_to_set(rhs);
return is_subset_of(other_set.begin(), other_set.end());
return fn(typed_set, convert_to_set<T>(rhs), cmp);
}

template <class It1, class It2>
bool SetBase::is_subset_of(It1 first, It2 last) const
template <typename Fn>
auto as_typed_sets(const SetBase& set, const CollectionBase& rhs, Fn fn)
{
return std::includes(first, last, begin(), end(), SetElementLessThan<Mixed>{});
// Set<StringData> and Set<BinaryData> compare using `char` rather than
// `unsigned char`, resulting in a different order from Set<Mixed> on
// platforms where `char` is signed. This means that the elements will not
// be ordered properly when using SetElementLessThan<Mixed>, and we need
// to use the typed comparisons.
// This can go away in the next major version, as we've fixed the order
// of non-Mixed sets.
switch (set.get_col_key().get_type()) {
case col_type_String:
return to_set_if_needed<Set<StringData>, StringData>(set, rhs, fn);
case col_type_Binary:
return to_set_if_needed<Set<BinaryData>, BinaryData>(set, rhs, fn);
default:
return to_set_if_needed<SetBase, Mixed>(set, rhs, fn);
}
}
} // anonymous namespace

bool SetBase::is_strict_subset_of(const CollectionBase& rhs) const
bool SetBase::is_subset_of(const CollectionBase& rhs) const
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return size() != rhs.size() && is_subset_of(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return size() != other_set.size() && is_subset_of(other_set.begin(), other_set.end());
return as_typed_sets(*this, rhs, [](auto&& lhs, auto&& rhs, auto cmp) {
return std::includes(rhs.begin(), rhs.end(), lhs.begin(), lhs.end(), cmp);
});
}

bool SetBase::is_superset_of(const CollectionBase& rhs) const
bool SetBase::is_strict_subset_of(const CollectionBase& rhs) const
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return is_superset_of(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return is_superset_of(other_set.begin(), other_set.end());
return size() != rhs.size() && is_subset_of(rhs);
}

template <class It1, class It2>
bool SetBase::is_superset_of(It1 first, It2 last) const
bool SetBase::is_superset_of(const CollectionBase& rhs) const
{
return std::includes(begin(), end(), first, last, SetElementLessThan<Mixed>{});
return as_typed_sets(*this, rhs, [](auto&& lhs, auto&& rhs, auto cmp) {
return std::includes(lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), cmp);
});
}

bool SetBase::is_strict_superset_of(const CollectionBase& rhs) const
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return size() != rhs.size() && is_superset_of(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return size() != other_set.size() && is_superset_of(other_set.begin(), other_set.end());
return as_typed_sets(*this, rhs, [](auto&& lhs, auto&& rhs, auto cmp) {
return lhs.size() > rhs.size() && std::includes(lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), cmp);
});
}

bool SetBase::intersects(const CollectionBase& rhs) const
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return intersects(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return intersects(other_set.begin(), other_set.end());
}

template <class It1, class It2>
bool SetBase::intersects(It1 first, It2 last) const
{
SetElementLessThan<Mixed> less;
auto it = begin();
while (it != end() && first != last) {
if (less(*it, *first)) {
++it;
}
else if (less(*first, *it)) {
++first;
}
else {
return true;
return as_typed_sets(*this, rhs, [](auto&& lhs, auto&& rhs, auto cmp) {
auto first = rhs.begin(), last = rhs.end();
auto it = lhs.begin();
while (it != lhs.end() && first != last) {
if (cmp(*it, *first)) {
++it;
}
else if (cmp(*first, *it)) {
++first;
}
else {
return true;
}
}
}
return false;
return false;
});
}

bool SetBase::set_equals(const CollectionBase& rhs) const
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return size() == rhs.size() && is_subset_of(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return size() == other_set.size() && is_subset_of(other_set.begin(), other_set.end());
return as_typed_sets(*this, rhs, [](auto&& lhs, auto&& rhs, auto cmp) {
return lhs.size() == rhs.size() && std::includes(lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), cmp);
});
}

void SetBase::assign_union(const CollectionBase& rhs)
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return assign_union(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return assign_union(other_set.begin(), other_set.end());
}

template <class It1, class It2>
void SetBase::assign_union(It1 first, It2 last)
{
std::vector<Mixed> the_diff;
std::set_difference(first, last, begin(), end(), std::back_inserter(the_diff), SetElementLessThan<Mixed>{});
// 'the_diff' now contains all the elements that are in foreign set, but not in 'this'
// Now insert those elements
for (auto&& value : the_diff) {
insert_any(value);
}
if (*this == rhs) {
// Union of a set with itself is itself
return;
}
return as_typed_sets(*this, rhs, [&](auto&& lhs, auto&& rhs, auto cmp) {
std::vector<Mixed> the_diff;
std::set_difference(rhs.begin(), rhs.end(), lhs.begin(), lhs.end(), std::back_inserter(the_diff), cmp);
// 'the_diff' now contains all the elements that are in foreign set, but not in 'this'
// Now insert those elements
for (auto&& value : the_diff) {
insert_any(value);
}
});
}

void SetBase::assign_intersection(const CollectionBase& rhs)
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return assign_intersection(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return assign_intersection(other_set.begin(), other_set.end());
}

template <class It1, class It2>
void SetBase::assign_intersection(It1 first, It2 last)
{
std::vector<Mixed> intersection;
std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection), SetElementLessThan<Mixed>{});
clear();
// Elements in intersection comes from foreign set, so ok to use here
for (auto&& value : intersection) {
insert_any(value);
}
if (*this == rhs) {
// Intersection of a set with itself is itself
return;
}
return as_typed_sets(*this, rhs, [&](auto&& lhs, auto&& rhs, auto cmp) {
std::vector<Mixed> intersection;
std::set_intersection(rhs.begin(), rhs.end(), lhs.begin(), lhs.end(), std::back_inserter(intersection), cmp);
clear();
// Elements in intersection comes from foreign set, so ok to use here
for (auto&& value : intersection) {
insert_any(value);
}
});
}

void SetBase::assign_difference(const CollectionBase& rhs)
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return assign_difference(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return assign_difference(other_set.begin(), other_set.end());
}

template <class It1, class It2>
void SetBase::assign_difference(It1 first, It2 last)
{
std::vector<Mixed> intersection;
std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection), SetElementLessThan<Mixed>{});
// 'intersection' now contains all the elements that are in both foreign set and 'this'.
// Remove those elements. The elements comes from the foreign set, so ok to refer to.
for (auto&& value : intersection) {
erase_any(value);
}
if (*this == rhs) {
// Difference between a set and itself is empty
clear();
return;
}
return as_typed_sets(*this, rhs, [&](auto&& lhs, auto&& rhs, auto cmp) {
std::vector<Mixed> intersection;
std::set_intersection(rhs.begin(), rhs.end(), lhs.begin(), lhs.end(), std::back_inserter(intersection), cmp);
// 'intersection' now contains all the elements that are in both foreign set and 'this'.
// Remove those elements. The elements comes from the foreign set, so ok to refer to.
for (auto&& value : intersection) {
erase_any(value);
}
});
}

void SetBase::assign_symmetric_difference(const CollectionBase& rhs)
{
if (auto other_set = dynamic_cast<const SetBase*>(&rhs)) {
return assign_symmetric_difference(other_set->begin(), other_set->end());
}
auto other_set = convert_to_set(rhs);
return assign_symmetric_difference(other_set.begin(), other_set.end());
}

template <class It1, class It2>
void SetBase::assign_symmetric_difference(It1 first, It2 last)
{
std::vector<Mixed> difference;
std::set_difference(first, last, begin(), end(), std::back_inserter(difference), SetElementLessThan<Mixed>{});
std::vector<Mixed> intersection;
std::set_intersection(first, last, begin(), end(), std::back_inserter(intersection), SetElementLessThan<Mixed>{});
// Now remove the common elements and add the differences
for (auto&& value : intersection) {
erase_any(value);
}
for (auto&& value : difference) {
insert_any(value);
}
if (*this == rhs) {
// Difference between a set and itself is empty
clear();
return;
}
return as_typed_sets(*this, rhs, [&](auto&& lhs, auto&& rhs, auto cmp) {
std::vector<Mixed> difference;
std::set_difference(rhs.begin(), rhs.end(), lhs.begin(), lhs.end(), std::back_inserter(difference), cmp);
std::vector<Mixed> intersection;
std::set_intersection(rhs.begin(), rhs.end(), lhs.begin(), lhs.end(), std::back_inserter(intersection), cmp);
// Now remove the common elements and add the differences
for (auto&& value : intersection) {
erase_any(value);
}
for (auto&& value : difference) {
insert_any(value);
}
});
}

template <>
Expand Down
22 changes: 0 additions & 22 deletions src/realm/set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,28 +61,6 @@ class SetBase : public CollectionBase {
throw InvalidArgument(ErrorCodes::PropertyNotNullable,
util::format("Set: %1", CollectionBase::get_property_name()));
}

private:
template <class It1, class It2>
bool is_subset_of(It1, It2) const;

template <class It1, class It2>
bool is_superset_of(It1, It2) const;

template <class It1, class It2>
bool intersects(It1, It2) const;

template <class It1, class It2>
void assign_union(It1, It2);

template <class It1, class It2>
void assign_intersection(It1, It2);

template <class It1, class It2>
void assign_difference(It1, It2);

template <class It1, class It2>
void assign_symmetric_difference(It1, It2);
};

template <class T>
Expand Down
Loading

0 comments on commit b0b4936

Please sign in to comment.