Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ctl::shared_ptr #1229

Draft
wants to merge 12 commits into
base: master
Choose a base branch
from
97 changes: 97 additions & 0 deletions ctl/shared_ptr.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// -*- mode:c++; indent-tabs-mode:nil; c-basic-offset:4; coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Justine Alexandra Roberts Tunney
//
// Permission to use, copy, modify, and/or distribute this software for
// any purpose with or without fee is hereby granted, provided that the
// above copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.

#include "shared_ptr.h"

#include "libc/intrin/atomic.h"

namespace {

inline void
incref(_Atomic(size_t)* r)
{
size_t r2 = atomic_fetch_add_explicit(r, 1, memory_order_relaxed);
if (r2 > ((size_t)-1) >> 1)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we realistically expect 2**63 references to happen? Probably not worth the bloat. This function is basically just an XADD instruction. I'd say put it in the header. The atomics header in cosmo libc is pretty lightweight. It's a natural dependency of this class.

__builtin_trap();
}

inline int
decref(_Atomic(size_t)* r)
{
if (!atomic_fetch_sub_explicit(r, 1, memory_order_release)) {
atomic_thread_fence(memory_order_acquire);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how I feel about atomic_thread_fence(). I just tried using a similar trick to this for my syncthreads() function on an M2 processor and it made things slower. For example:

void
syncthreads(void)
{
    static atomic_uint count;
    static atomic_uint phaser;
    int phase = atomic_load_explicit(&phaser, memory_order_relaxed);
    if (atomic_fetch_add_explicit(&count, 1, memory_order_acq_rel) + 1 == nth) {
        atomic_store_explicit(&count, 0, memory_order_relaxed);
        atomic_store_explicit(&phaser, phase + 1, memory_order_release);
    } else {
        int backoff = 0;
        while (atomic_load_explicit(&phaser, memory_order_acquire) == phase)
            backoff = delay(backoff);
    }
}

Became:

void
syncthreads(void)
{
    static atomic_uint count;
    static atomic_uint phaser;
    int phase = atomic_load_explicit(&phaser, memory_order_relaxed);
    if (atomic_fetch_add_explicit(&count, 1, memory_order_release) + 1 == nth) {
        atomic_thread_fence(memory_order_acquire);
        atomic_store_explicit(&count, 0, memory_order_relaxed);
        atomic_store_explicit(&phaser, phase + 1, memory_order_release);
    } else {
        int backoff = 0;
        while (atomic_load_explicit(&phaser, memory_order_acquire) == phase)
            backoff = delay(backoff);
    }
}

One nasty thing about atomic_thread_fence() is it isn't supported by TSAN at all. So it's harder to prove code is correct.

How certain are you that this decref() implementation is optimal? Could you whip up a torture test + benchmark that demonstrates its merit in comparison to alternatives? I would have assumed that that doing an acquire load beforehand to check for zero would be faster, since it'd let you avoid the costly XADD instruction in many cases.

return 1;
}
return 0;
}

inline size_t
getref(const _Atomic(size_t)* r)
{
return atomic_load_explicit(r, memory_order_relaxed);
}

} // namespace

namespace ctl {

namespace __ {

void
shared_control::keep_shared() noexcept
{
incref(&shared);
}

void
shared_control::drop_shared() noexcept
{
if (decref(&shared)) {
on_zero_shared();
drop_weak();
}
}

void
shared_control::keep_weak() noexcept
{
incref(&weak);
}

void
shared_control::drop_weak() noexcept
{
if (decref(&weak))
on_zero_weak();
}

size_t
shared_control::use_count() const noexcept
{
return 1 + getref(&shared);
}

size_t
shared_control::weak_count() const noexcept
{
return getref(&weak);
}

} // namespace __

} // namespace ctl
274 changes: 274 additions & 0 deletions ctl/shared_ptr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
// -*-mode:c++;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8-*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
#ifndef COSMOPOLITAN_CTL_SHARED_PTR_H_
#define COSMOPOLITAN_CTL_SHARED_PTR_H_
#include "new.h"
#include "unique_ptr.h"

namespace ctl {

// TODO(mrdomino): move
struct exception
{
virtual const char* what() const noexcept
{
return "exception";
}
};

namespace __ {

struct shared_control
{
_Atomic(size_t) shared;
_Atomic(size_t) weak;

constexpr shared_control() noexcept : shared(0), weak(0)
{
}

shared_control(const shared_control&) = delete;

virtual ~shared_control()
{
}

void keep_shared() noexcept;
void drop_shared() noexcept;
void keep_weak() noexcept;
void drop_weak() noexcept;
size_t use_count() const noexcept;
size_t weak_count() const noexcept;

private:
virtual void on_zero_shared() noexcept = 0;
virtual void on_zero_weak() noexcept = 0;
};

template<typename T, typename D>
struct shared_pointer : shared_control
{
T* p;
[[no_unique_address]] D d;

static shared_pointer* make(T* const p, auto&& d)
{
auto p2 = unique_ptr(p);
auto r = new shared_pointer(p2.release(), ctl::forward<decltype(d)>(d));
return r;
}

private:
explicit constexpr shared_pointer(T* const p, auto&& d) noexcept
: p(p), d(ctl::forward<decltype(d)>(d))
{
}

void on_zero_shared() noexcept override
{
ctl::move(d)(p);
}

void on_zero_weak() noexcept override
{
delete this;
}
};

template<typename T>
struct shared_emplace : shared_control
{
union
{
T t;
};

template<typename... Args>
void construct(Args&&... args)
{
::new (&t) T(ctl::forward<Args>(args)...);
}

static unique_ptr<shared_emplace> make_unique()
{
return new shared_emplace();
}

private:
explicit constexpr shared_emplace() noexcept
{
}

void on_zero_shared() noexcept override
{
t.~T();
}

void on_zero_weak() noexcept override
{
delete this;
}
};

} // namespace __

struct bad_weak_ptr : ctl::exception
{
const char* what() const noexcept override
{
return "bad_weak_ptr";
}
};

template<typename T, typename D = default_delete<T>>
class shared_ptr
{
public:
using element_type = T; // TODO(mrdomino): remove extent?
using deleter_type = D;

constexpr shared_ptr(nullptr_t = nullptr) noexcept : p(nullptr), rc(nullptr)
{
}

explicit shared_ptr(auto* const p)
: p(p), rc(__::shared_pointer<T, D>::make(p, D()))
{
}

shared_ptr(const shared_ptr& r) noexcept : p(r.p), rc(r.rc)
{
if (rc)
rc->keep_shared();
}

shared_ptr(shared_ptr&& r) noexcept : p(r.p), rc(r.rc)
{
r.p = nullptr;
r.rc = nullptr;
}

template<typename U>
shared_ptr(const shared_ptr<U>& r, T* const p) noexcept : p(p), rc(r.rc)
{
if (rc)
rc->keep_shared();
}

template<typename U>
shared_ptr(shared_ptr<U>&& r, T* const p) noexcept : p(p), rc(r.rc)
{
r.p = nullptr;
r.rc = nullptr;
}

// TODO(mrdomino): moar ctors

~shared_ptr()
{
if (rc)
rc->drop_shared();
}

shared_ptr& operator=(shared_ptr r) noexcept
{
swap(r);
return *this;
}

void reset() noexcept
{
if (rc)
rc->drop_shared();
p = nullptr;
rc = nullptr;
}

void reset(auto* const p2)
{
shared_ptr<T>(p2).swap(*this);
}

void swap(shared_ptr& r) noexcept
{
using ctl::swap;
swap(p, r.p);
swap(rc, r.rc);
}

element_type* get() const noexcept
{
return p;
}

// TODO(mrdomino): fix for shared_ptr<void>
T& operator*() const noexcept
{
if (!p)
__builtin_trap();
return *p;
}

// TODO(mrdomino): fix for shared_ptr<T[]>
T* operator->() const noexcept
{
if (!p)
__builtin_trap();
return *p;
}

element_type& operator[](ptrdiff_t i) const
{
return *(p + i);
}

size_t use_count() const noexcept
{
return rc ? rc->use_count() : 0;
}

explicit operator bool() const noexcept
{
return p;
}

template<typename U>
bool owner_before(const shared_ptr<U>& r) const noexcept
{
return p < r.p;
}

// TODO(mrdomino): owner_before(weak_ptr const&)

private:
constexpr shared_ptr(T* const p, __::shared_control* rc) noexcept
: p(p), rc(rc)
{
}

template<typename U, typename... Args>
friend shared_ptr<U> make_shared(Args&&... args);

T* p;
__::shared_control* rc;
};

template<typename T, typename... Args>
shared_ptr<T>
make_shared(Args&&... args)
{
auto rc = __::shared_emplace<T>::make_unique();
rc->construct(ctl::forward<Args>(args)...);
auto r = shared_ptr<T>(&rc->t, rc.get());
rc.release();
return r;
}

// TODO(mrdomino): non-member functions (make_shared et al)
// TODO(mrdomino): weak_ptr

// TODO(someday): std::atomic<std::shared_ptr>

} // namespace ctl

#endif // COSMOPOLITAN_CTL_SHARED_PTR_H_
Loading
Loading