758 lines
26 KiB
C
758 lines
26 KiB
C
|
/*
|
||
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
#pragma once
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <atomic>
|
||
|
#include <cstdint>
|
||
|
#include <cstring>
|
||
|
#include <memory>
|
||
|
#include <system_error>
|
||
|
|
||
|
#include <folly/CPortability.h>
|
||
|
#include <folly/IndexedMemPool.h>
|
||
|
#include <folly/Likely.h>
|
||
|
#include <folly/Portability.h>
|
||
|
#include <folly/Traits.h>
|
||
|
#include <folly/detail/StaticSingletonManager.h>
|
||
|
#include <folly/lang/Aligned.h>
|
||
|
#include <folly/lang/SafeAssert.h>
|
||
|
#include <folly/synchronization/AtomicStruct.h>
|
||
|
#include <folly/synchronization/SaturatingSemaphore.h>
|
||
|
|
||
|
namespace folly {
|
||
|
|
||
|
template <
|
||
|
template <typename> class Atom = std::atomic,
|
||
|
class BatonType = SaturatingSemaphore<true, Atom>>
|
||
|
struct LifoSemImpl;
|
||
|
|
||
|
/// LifoSem is a semaphore that wakes its waiters in a manner intended to
|
||
|
/// maximize performance rather than fairness. It should be preferred
|
||
|
/// to a mutex+condvar or POSIX sem_t solution when all of the waiters
|
||
|
/// are equivalent. It is faster than a condvar or sem_t, and it has a
|
||
|
/// shutdown state that might save you a lot of complexity when it comes
|
||
|
/// time to shut down your work pipelines. LifoSem is larger than sem_t,
|
||
|
/// but that is only because it uses padding and alignment to avoid
|
||
|
/// false sharing.
|
||
|
///
|
||
|
/// LifoSem allows multi-post and multi-tryWait, and provides a shutdown
|
||
|
/// state that awakens all waiters. LifoSem is faster than sem_t because
|
||
|
/// it performs exact wakeups, so it often requires fewer system calls.
|
||
|
/// It provides all of the functionality of sem_t except for timed waiting.
|
||
|
/// It is called LifoSem because its wakeup policy is approximately LIFO,
|
||
|
/// rather than the usual FIFO.
|
||
|
///
|
||
|
/// The core semaphore operations provided are:
|
||
|
///
|
||
|
/// -- post() -- if there is a pending waiter, wake it up, otherwise
|
||
|
/// increment the value of the semaphore. If the value of the semaphore
|
||
|
/// is already 2^32-1, does nothing. Compare to sem_post().
|
||
|
///
|
||
|
/// -- post(n) -- equivalent to n calls to post(), but much more efficient.
|
||
|
/// sem_t has no equivalent to this method.
|
||
|
///
|
||
|
/// -- bool tryWait() -- if the semaphore's value is positive, decrements it
|
||
|
/// and returns true, otherwise returns false. Compare to sem_trywait().
|
||
|
///
|
||
|
/// -- uint32_t tryWait(uint32_t n) -- attempts to decrement the semaphore's
|
||
|
/// value by n, returning the amount by which it actually was decremented
|
||
|
/// (a value from 0 to n inclusive). Not atomic. Equivalent to n calls
|
||
|
/// to tryWait(). sem_t has no equivalent to this method.
|
||
|
///
|
||
|
/// -- wait() -- waits until tryWait() can succeed. Compare to sem_wait().
|
||
|
///
|
||
|
/// -- timed wait variants - will wait until timeout. Note when these
|
||
|
/// timeout, the current implementation takes a lock, blocking
|
||
|
/// concurrent pushes and pops. (If timed wait calls are
|
||
|
/// substantial, consider re-working this code to be lock-free).
|
||
|
///
|
||
|
/// LifoSem also has the notion of a shutdown state, in which any calls
|
||
|
/// that would block (or are already blocked) throw ShutdownSemError.
|
||
|
/// Note the difference between a call to wait() and a call to wait()
|
||
|
/// that might block. In the former case tryWait() would succeed, and no
|
||
|
/// isShutdown() check is performed. In the latter case an exception is
|
||
|
/// thrown. This behavior allows a LifoSem controlling work distribution
|
||
|
/// to drain. If you want to immediately stop all waiting on shutdown,
|
||
|
/// you can just check isShutdown() yourself (preferrably wrapped in
|
||
|
/// an UNLIKELY). This fast-stop behavior is easy to add, but difficult
|
||
|
/// to remove if you want the draining behavior, which is why we have
|
||
|
/// chosen the former.
|
||
|
///
|
||
|
/// All LifoSem operations except valueGuess() are guaranteed to be
|
||
|
/// linearizable.
|
||
|
typedef LifoSemImpl<> LifoSem;
|
||
|
|
||
|
/// The exception thrown when wait()ing on an isShutdown() LifoSem
|
||
|
class FOLLY_EXPORT ShutdownSemError : public std::runtime_error {
|
||
|
public:
|
||
|
using std::runtime_error::runtime_error;
|
||
|
};
|
||
|
|
||
|
namespace detail {
|
||
|
|
||
|
// Internally, a LifoSem is either a value or a linked list of wait nodes.
|
||
|
// This union is captured in the LifoSemHead type, which holds either a
|
||
|
// value or an indexed pointer to the list. LifoSemHead itself is a value
|
||
|
// type, the head is a mutable atomic box containing a LifoSemHead value.
|
||
|
// Each wait node corresponds to exactly one waiter. Values can flow
|
||
|
// through the semaphore either by going into and out of the head's value,
|
||
|
// or by direct communication from a poster to a waiter. The former path
|
||
|
// is taken when there are no pending waiters, the latter otherwise. The
|
||
|
// general flow of a post is to try to increment the value or pop-and-post
|
||
|
// a wait node. Either of those have the effect of conveying one semaphore
|
||
|
// unit. Waiting is the opposite, either a decrement of the value or
|
||
|
// push-and-wait of a wait node. The generic LifoSemBase abstracts the
|
||
|
// actual mechanism by which a wait node's post->wait communication is
|
||
|
// performed, which is why we have LifoSemRawNode and LifoSemNode.
|
||
|
|
||
|
/// LifoSemRawNode is the actual pooled storage that backs LifoSemNode
|
||
|
/// for user-specified Handoff types. This is done so that we can have
|
||
|
/// a large static IndexedMemPool of nodes, instead of per-type pools
|
||
|
template <template <typename> class Atom>
|
||
|
struct LifoSemRawNode {
|
||
|
aligned_storage_for_t<void*> raw;
|
||
|
|
||
|
/// The IndexedMemPool index of the next node in this chain, or 0
|
||
|
/// if none. This will be set to uint32_t(-1) if the node is being
|
||
|
/// posted due to a shutdown-induced wakeup
|
||
|
Atom<uint32_t> next{0};
|
||
|
|
||
|
bool isShutdownNotice() const {
|
||
|
return next.load(std::memory_order_relaxed) == uint32_t(-1);
|
||
|
}
|
||
|
void clearShutdownNotice() {
|
||
|
next.store(0, std::memory_order_relaxed);
|
||
|
}
|
||
|
void setShutdownNotice() {
|
||
|
next.store(uint32_t(-1), std::memory_order_relaxed);
|
||
|
}
|
||
|
|
||
|
typedef folly::IndexedMemPool<
|
||
|
LifoSemRawNode<Atom>,
|
||
|
32,
|
||
|
200,
|
||
|
Atom,
|
||
|
IndexedMemPoolTraitsLazyRecycle<LifoSemRawNode<Atom>>>
|
||
|
Pool;
|
||
|
|
||
|
/// Storage for all of the waiter nodes for LifoSem-s that use Atom
|
||
|
static Pool& pool() {
|
||
|
return detail::createGlobal<PoolImpl, void>();
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
struct PoolImpl : Pool {
|
||
|
/// Raw node storage is preallocated in a contiguous memory segment,
|
||
|
/// but we use an anonymous mmap so the physical memory used (RSS) will
|
||
|
/// only reflect the maximum number of waiters that actually existed
|
||
|
/// concurrently. For blocked threads the max node count is limited by the
|
||
|
/// number of threads, so we can conservatively estimate that this will be
|
||
|
/// < 10k. For LifoEventSem, however, we could potentially have many more.
|
||
|
///
|
||
|
/// On a 64-bit architecture each LifoSemRawNode takes 16 bytes. We make
|
||
|
/// the pool 1 million entries.
|
||
|
static constexpr size_t capacity = 1 << 20;
|
||
|
|
||
|
PoolImpl() : Pool(static_cast<uint32_t>(capacity)) {}
|
||
|
};
|
||
|
};
|
||
|
|
||
|
/// Handoff is a type not bigger than a void* that knows how to perform a
|
||
|
/// single post() -> wait() communication. It must have a post() method.
|
||
|
/// If it has a wait() method then LifoSemBase's wait() implementation
|
||
|
/// will work out of the box, otherwise you will need to specialize
|
||
|
/// LifoSemBase::wait accordingly.
|
||
|
template <typename Handoff, template <typename> class Atom>
|
||
|
struct LifoSemNode : public LifoSemRawNode<Atom> {
|
||
|
static_assert(
|
||
|
sizeof(Handoff) <= sizeof(LifoSemRawNode<Atom>::raw),
|
||
|
"Handoff too big for small-object optimization, use indirection");
|
||
|
static_assert(
|
||
|
alignof(Handoff) <= alignof(decltype(LifoSemRawNode<Atom>::raw)),
|
||
|
"Handoff alignment constraint not satisfied");
|
||
|
|
||
|
template <typename... Args>
|
||
|
void init(Args&&... args) {
|
||
|
new (&this->raw) Handoff(std::forward<Args>(args)...);
|
||
|
}
|
||
|
|
||
|
void destroy() {
|
||
|
handoff().~Handoff();
|
||
|
if (kIsDebug) {
|
||
|
memset(&this->raw, 'F', sizeof(this->raw));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Handoff& handoff() {
|
||
|
return *static_cast<Handoff*>(static_cast<void*>(&this->raw));
|
||
|
}
|
||
|
|
||
|
const Handoff& handoff() const {
|
||
|
return *static_cast<const Handoff*>(static_cast<const void*>(&this->raw));
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <typename Handoff, template <typename> class Atom>
|
||
|
struct LifoSemNodeRecycler {
|
||
|
void operator()(LifoSemNode<Handoff, Atom>* elem) const {
|
||
|
elem->destroy();
|
||
|
auto idx = LifoSemRawNode<Atom>::pool().locateElem(elem);
|
||
|
LifoSemRawNode<Atom>::pool().recycleIndex(idx);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
/// LifoSemHead is a 64-bit struct that holds a 32-bit value, some state
|
||
|
/// bits, and a sequence number used to avoid ABA problems in the lock-free
|
||
|
/// management of the LifoSem's wait lists. The value can either hold
|
||
|
/// an integral semaphore value (if there are no waiters) or a node index
|
||
|
/// (see IndexedMemPool) for the head of a list of wait nodes
|
||
|
class LifoSemHead {
|
||
|
// What we really want are bitfields:
|
||
|
// uint64_t data : 32; uint64_t isNodeIdx : 1; uint64_t seq : 31;
|
||
|
// Unfortunately g++ generates pretty bad code for this sometimes (I saw
|
||
|
// -O3 code from gcc 4.7.1 copying the bitfields one at a time instead of
|
||
|
// in bulk, for example). We can generate better code anyway by assuming
|
||
|
// that setters won't be given values that cause under/overflow, and
|
||
|
// putting the sequence at the end where its planned overflow doesn't
|
||
|
// need any masking.
|
||
|
//
|
||
|
// data == 0 (empty list) with isNodeIdx is conceptually the same
|
||
|
// as data == 0 (no unclaimed increments) with !isNodeIdx, we always
|
||
|
// convert the former into the latter to make the logic simpler.
|
||
|
enum {
|
||
|
IsNodeIdxShift = 32,
|
||
|
IsShutdownShift = 33,
|
||
|
IsLockedShift = 34,
|
||
|
SeqShift = 35,
|
||
|
};
|
||
|
enum : uint64_t {
|
||
|
IsNodeIdxMask = uint64_t(1) << IsNodeIdxShift,
|
||
|
IsShutdownMask = uint64_t(1) << IsShutdownShift,
|
||
|
IsLockedMask = uint64_t(1) << IsLockedShift,
|
||
|
SeqIncr = uint64_t(1) << SeqShift,
|
||
|
SeqMask = ~(SeqIncr - 1),
|
||
|
};
|
||
|
|
||
|
public:
|
||
|
uint64_t bits;
|
||
|
|
||
|
//////// getters
|
||
|
|
||
|
inline uint32_t idx() const {
|
||
|
assert(isNodeIdx());
|
||
|
assert(uint32_t(bits) != 0);
|
||
|
return uint32_t(bits);
|
||
|
}
|
||
|
inline uint32_t value() const {
|
||
|
assert(!isNodeIdx());
|
||
|
return uint32_t(bits);
|
||
|
}
|
||
|
inline constexpr bool isNodeIdx() const {
|
||
|
return (bits & IsNodeIdxMask) != 0;
|
||
|
}
|
||
|
inline constexpr bool isShutdown() const {
|
||
|
return (bits & IsShutdownMask) != 0;
|
||
|
}
|
||
|
inline constexpr bool isLocked() const {
|
||
|
return (bits & IsLockedMask) != 0;
|
||
|
}
|
||
|
inline constexpr uint32_t seq() const {
|
||
|
return uint32_t(bits >> SeqShift);
|
||
|
}
|
||
|
|
||
|
//////// setter-like things return a new struct
|
||
|
|
||
|
/// This should only be used for initial construction, not for setting
|
||
|
/// the value, because it clears the sequence number
|
||
|
static inline constexpr LifoSemHead fresh(uint32_t value) {
|
||
|
return LifoSemHead{value};
|
||
|
}
|
||
|
|
||
|
/// Returns the LifoSemHead that results from popping a waiter node,
|
||
|
/// given the current waiter node's next ptr
|
||
|
inline LifoSemHead withPop(uint32_t idxNext) const {
|
||
|
assert(!isLocked());
|
||
|
assert(isNodeIdx());
|
||
|
if (idxNext == 0) {
|
||
|
// no isNodeIdx bit or data bits. Wraparound of seq bits is okay
|
||
|
return LifoSemHead{(bits & (SeqMask | IsShutdownMask)) + SeqIncr};
|
||
|
} else {
|
||
|
// preserve sequence bits (incremented with wraparound okay) and
|
||
|
// isNodeIdx bit, replace all data bits
|
||
|
return LifoSemHead{(bits & (SeqMask | IsShutdownMask | IsNodeIdxMask)) +
|
||
|
SeqIncr + idxNext};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/// Returns the LifoSemHead that results from pushing a new waiter node
|
||
|
inline LifoSemHead withPush(uint32_t _idx) const {
|
||
|
assert(!isLocked());
|
||
|
assert(isNodeIdx() || value() == 0);
|
||
|
assert(!isShutdown());
|
||
|
assert(_idx != 0);
|
||
|
return LifoSemHead{(bits & SeqMask) | IsNodeIdxMask | _idx};
|
||
|
}
|
||
|
|
||
|
/// Returns the LifoSemHead with value increased by delta, with
|
||
|
/// saturation if the maximum value is reached
|
||
|
inline LifoSemHead withValueIncr(uint32_t delta) const {
|
||
|
assert(!isLocked());
|
||
|
assert(!isNodeIdx());
|
||
|
auto rv = LifoSemHead{bits + SeqIncr + delta};
|
||
|
if (UNLIKELY(rv.isNodeIdx())) {
|
||
|
// value has overflowed into the isNodeIdx bit
|
||
|
rv = LifoSemHead{(rv.bits & ~IsNodeIdxMask) | (IsNodeIdxMask - 1)};
|
||
|
}
|
||
|
return rv;
|
||
|
}
|
||
|
|
||
|
/// Returns the LifoSemHead that results from decrementing the value
|
||
|
inline LifoSemHead withValueDecr(uint32_t delta) const {
|
||
|
assert(!isLocked());
|
||
|
assert(delta > 0 && delta <= value());
|
||
|
return LifoSemHead{bits + SeqIncr - delta};
|
||
|
}
|
||
|
|
||
|
/// Returns the LifoSemHead with the same state as the current node,
|
||
|
/// but with the shutdown bit set
|
||
|
inline LifoSemHead withShutdown() const {
|
||
|
return LifoSemHead{bits | IsShutdownMask};
|
||
|
}
|
||
|
|
||
|
// Returns LifoSemHead with lock bit set, but rest of bits unchanged.
|
||
|
inline LifoSemHead withLock() const {
|
||
|
assert(!isLocked());
|
||
|
return LifoSemHead{bits | IsLockedMask};
|
||
|
}
|
||
|
|
||
|
// Returns LifoSemHead with lock bit unset, and updated seqno based
|
||
|
// on idx.
|
||
|
inline LifoSemHead withoutLock(uint32_t idxNext) const {
|
||
|
assert(isLocked());
|
||
|
// We need to treat this as a pop, as we may change the list head.
|
||
|
return LifoSemHead{bits & ~IsLockedMask}.withPop(idxNext);
|
||
|
}
|
||
|
|
||
|
inline constexpr bool operator==(const LifoSemHead& rhs) const {
|
||
|
return bits == rhs.bits;
|
||
|
}
|
||
|
inline constexpr bool operator!=(const LifoSemHead& rhs) const {
|
||
|
return !(*this == rhs);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
/// LifoSemBase is the engine for several different types of LIFO
|
||
|
/// semaphore. LifoSemBase handles storage of positive semaphore values
|
||
|
/// and wait nodes, but the actual waiting and notification mechanism is
|
||
|
/// up to the client.
|
||
|
///
|
||
|
/// The Handoff type is responsible for arranging one wakeup notification.
|
||
|
/// See LifoSemNode for more information on how to make your own.
|
||
|
template <typename Handoff, template <typename> class Atom = std::atomic>
|
||
|
struct LifoSemBase {
|
||
|
/// Constructor
|
||
|
constexpr explicit LifoSemBase(uint32_t initialValue = 0)
|
||
|
: head_(in_place, LifoSemHead::fresh(initialValue)) {}
|
||
|
|
||
|
LifoSemBase(LifoSemBase const&) = delete;
|
||
|
LifoSemBase& operator=(LifoSemBase const&) = delete;
|
||
|
|
||
|
/// Silently saturates if value is already 2^32-1
|
||
|
bool post() {
|
||
|
auto idx = incrOrPop(1);
|
||
|
if (idx != 0) {
|
||
|
idxToNode(idx).handoff().post();
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/// Equivalent to n calls to post(), except may be much more efficient.
|
||
|
/// At any point in time at which the semaphore's value would exceed
|
||
|
/// 2^32-1 if tracked with infinite precision, it may be silently
|
||
|
/// truncated to 2^32-1. This saturation is not guaranteed to be exact,
|
||
|
/// although it is guaranteed that overflow won't result in wrap-around.
|
||
|
/// There would be a substantial performance and complexity cost in
|
||
|
/// guaranteeing exact saturation (similar to the cost of maintaining
|
||
|
/// linearizability near the zero value, but without as much of
|
||
|
/// a benefit).
|
||
|
void post(uint32_t n) {
|
||
|
uint32_t idx;
|
||
|
while (n > 0 && (idx = incrOrPop(n)) != 0) {
|
||
|
// pop accounts for only 1
|
||
|
idxToNode(idx).handoff().post();
|
||
|
--n;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/// Returns true iff shutdown() has been called
|
||
|
bool isShutdown() const {
|
||
|
return UNLIKELY(head_->load(std::memory_order_acquire).isShutdown());
|
||
|
}
|
||
|
|
||
|
/// Prevents blocking on this semaphore, causing all blocking wait()
|
||
|
/// calls to throw ShutdownSemError. Both currently blocked wait() and
|
||
|
/// future calls to wait() for which tryWait() would return false will
|
||
|
/// cause an exception. Calls to wait() for which the matching post()
|
||
|
/// has already occurred will proceed normally.
|
||
|
void shutdown() {
|
||
|
// first set the shutdown bit
|
||
|
auto h = head_->load(std::memory_order_acquire);
|
||
|
while (!h.isShutdown()) {
|
||
|
if (h.isLocked()) {
|
||
|
std::this_thread::yield();
|
||
|
h = head_->load(std::memory_order_acquire);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (head_->compare_exchange_strong(h, h.withShutdown())) {
|
||
|
// success
|
||
|
h = h.withShutdown();
|
||
|
break;
|
||
|
}
|
||
|
// compare_exchange_strong rereads h, retry
|
||
|
}
|
||
|
|
||
|
// now wake up any waiters
|
||
|
while (h.isNodeIdx()) {
|
||
|
if (h.isLocked()) {
|
||
|
std::this_thread::yield();
|
||
|
h = head_->load(std::memory_order_acquire);
|
||
|
continue;
|
||
|
}
|
||
|
auto& node = idxToNode(h.idx());
|
||
|
auto repl = h.withPop(node.next.load(std::memory_order_relaxed));
|
||
|
if (head_->compare_exchange_strong(h, repl)) {
|
||
|
// successful pop, wake up the waiter and move on. The next
|
||
|
// field is used to convey that this wakeup didn't consume a value
|
||
|
node.setShutdownNotice();
|
||
|
node.handoff().post();
|
||
|
h = repl;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/// Returns true iff value was decremented
|
||
|
bool tryWait() {
|
||
|
uint32_t n = 1;
|
||
|
auto rv = decrOrPush(n, 0);
|
||
|
assert(
|
||
|
(rv == WaitResult::DECR && n == 0) ||
|
||
|
(rv != WaitResult::DECR && n == 1));
|
||
|
// SHUTDOWN is okay here, since we don't actually wait
|
||
|
return rv == WaitResult::DECR;
|
||
|
}
|
||
|
|
||
|
/// Equivalent to (but may be much more efficient than) n calls to
|
||
|
/// tryWait(). Returns the total amount by which the semaphore's value
|
||
|
/// was decreased
|
||
|
uint32_t tryWait(uint32_t n) {
|
||
|
auto const orig = n;
|
||
|
while (n > 0) {
|
||
|
#ifndef NDEBUG
|
||
|
auto prev = n;
|
||
|
#endif
|
||
|
auto rv = decrOrPush(n, 0);
|
||
|
assert(
|
||
|
(rv == WaitResult::DECR && n < prev) ||
|
||
|
(rv != WaitResult::DECR && n == prev));
|
||
|
if (rv != WaitResult::DECR) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return orig - n;
|
||
|
}
|
||
|
|
||
|
/// Blocks the current thread until there is a matching post or the
|
||
|
/// semaphore is shut down. Throws ShutdownSemError if the semaphore
|
||
|
/// has been shut down and this method would otherwise be blocking.
|
||
|
/// Note that wait() doesn't throw during shutdown if tryWait() would
|
||
|
/// return true
|
||
|
void wait() {
|
||
|
auto const deadline = std::chrono::steady_clock::time_point::max();
|
||
|
auto res = try_wait_until(deadline);
|
||
|
FOLLY_SAFE_DCHECK(res, "infinity time has passed");
|
||
|
}
|
||
|
|
||
|
bool try_wait() {
|
||
|
return tryWait();
|
||
|
}
|
||
|
|
||
|
template <typename Rep, typename Period>
|
||
|
bool try_wait_for(const std::chrono::duration<Rep, Period>& timeout) {
|
||
|
return try_wait_until(timeout + std::chrono::steady_clock::now());
|
||
|
}
|
||
|
|
||
|
template <typename Clock, typename Duration>
|
||
|
bool try_wait_until(
|
||
|
const std::chrono::time_point<Clock, Duration>& deadline) {
|
||
|
// early check isn't required for correctness, but is an important
|
||
|
// perf win if we can avoid allocating and deallocating a node
|
||
|
if (tryWait()) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// allocateNode() won't compile unless Handoff has a default
|
||
|
// constructor
|
||
|
UniquePtr node = allocateNode();
|
||
|
|
||
|
auto rv = tryWaitOrPush(*node);
|
||
|
if (UNLIKELY(rv == WaitResult::SHUTDOWN)) {
|
||
|
assert(isShutdown());
|
||
|
throw ShutdownSemError("wait() would block but semaphore is shut down");
|
||
|
}
|
||
|
|
||
|
if (rv == WaitResult::PUSH) {
|
||
|
if (!node->handoff().try_wait_until(deadline)) {
|
||
|
if (tryRemoveNode(*node)) {
|
||
|
return false;
|
||
|
} else {
|
||
|
// We could not remove our node. Return to waiting.
|
||
|
//
|
||
|
// This only happens if we lose a removal race with post(),
|
||
|
// so we are not likely to wait long. This is only
|
||
|
// necessary to ensure we don't return node's memory back to
|
||
|
// IndexedMemPool before post() has had a chance to post to
|
||
|
// handoff(). In a stronger memory reclamation scheme, such
|
||
|
// as hazptr or rcu, this would not be necessary.
|
||
|
node->handoff().wait();
|
||
|
}
|
||
|
}
|
||
|
if (UNLIKELY(node->isShutdownNotice())) {
|
||
|
// this wait() didn't consume a value, it was triggered by shutdown
|
||
|
throw ShutdownSemError(
|
||
|
"blocking wait() interrupted by semaphore shutdown");
|
||
|
}
|
||
|
|
||
|
// node->handoff().wait() can't return until after the node has
|
||
|
// been popped and post()ed, so it is okay for the UniquePtr to
|
||
|
// recycle the node now
|
||
|
}
|
||
|
// else node wasn't pushed, so it is safe to recycle
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/// Returns a guess at the current value, designed for debugging.
|
||
|
/// If there are no concurrent posters or waiters then this will
|
||
|
/// be correct
|
||
|
uint32_t valueGuess() const {
|
||
|
// this is actually linearizable, but we don't promise that because
|
||
|
// we may want to add striping in the future to help under heavy
|
||
|
// contention
|
||
|
auto h = head_->load(std::memory_order_acquire);
|
||
|
return h.isNodeIdx() ? 0 : h.value();
|
||
|
}
|
||
|
|
||
|
protected:
|
||
|
enum class WaitResult {
|
||
|
PUSH,
|
||
|
DECR,
|
||
|
SHUTDOWN,
|
||
|
};
|
||
|
|
||
|
/// The type of a std::unique_ptr that will automatically return a
|
||
|
/// LifoSemNode to the appropriate IndexedMemPool
|
||
|
typedef std::
|
||
|
unique_ptr<LifoSemNode<Handoff, Atom>, LifoSemNodeRecycler<Handoff, Atom>>
|
||
|
UniquePtr;
|
||
|
|
||
|
/// Returns a node that can be passed to decrOrLink
|
||
|
template <typename... Args>
|
||
|
UniquePtr allocateNode(Args&&... args) {
|
||
|
auto idx = LifoSemRawNode<Atom>::pool().allocIndex();
|
||
|
if (idx != 0) {
|
||
|
auto& node = idxToNode(idx);
|
||
|
node.clearShutdownNotice();
|
||
|
try {
|
||
|
node.init(std::forward<Args>(args)...);
|
||
|
} catch (...) {
|
||
|
LifoSemRawNode<Atom>::pool().recycleIndex(idx);
|
||
|
throw;
|
||
|
}
|
||
|
return UniquePtr(&node);
|
||
|
} else {
|
||
|
return UniquePtr();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/// Returns DECR if the semaphore value was decremented (and waiterNode
|
||
|
/// was untouched), PUSH if a reference to the wait node was pushed,
|
||
|
/// or SHUTDOWN if decrement was not possible and push wasn't allowed
|
||
|
/// because isShutdown(). Ownership of the wait node remains the
|
||
|
/// responsibility of the caller, who must not release it until after
|
||
|
/// the node's Handoff has been posted.
|
||
|
WaitResult tryWaitOrPush(LifoSemNode<Handoff, Atom>& waiterNode) {
|
||
|
uint32_t n = 1;
|
||
|
return decrOrPush(n, nodeToIdx(waiterNode));
|
||
|
}
|
||
|
|
||
|
// Locks the list head (blocking concurrent pushes and pops)
|
||
|
// and attempts to remove this node. Returns true if node was
|
||
|
// found and removed, false if not found.
|
||
|
bool tryRemoveNode(const LifoSemNode<Handoff, Atom>& removenode) {
|
||
|
auto removeidx = nodeToIdx(removenode);
|
||
|
auto head = head_->load(std::memory_order_acquire);
|
||
|
// Try to lock the head.
|
||
|
while (true) {
|
||
|
if (head.isLocked()) {
|
||
|
std::this_thread::yield();
|
||
|
head = head_->load(std::memory_order_acquire);
|
||
|
continue;
|
||
|
}
|
||
|
if (!head.isNodeIdx()) {
|
||
|
return false;
|
||
|
}
|
||
|
if (head_->compare_exchange_weak(
|
||
|
head,
|
||
|
head.withLock(),
|
||
|
std::memory_order_acquire,
|
||
|
std::memory_order_relaxed)) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
// Update local var to what head_ is, for better assert() checking.
|
||
|
head = head.withLock();
|
||
|
bool result = false;
|
||
|
auto idx = head.idx();
|
||
|
if (idx == removeidx) {
|
||
|
// pop from head. Head seqno is updated.
|
||
|
head_->store(
|
||
|
head.withoutLock(removenode.next.load(std::memory_order_relaxed)),
|
||
|
std::memory_order_release);
|
||
|
return true;
|
||
|
}
|
||
|
auto node = &idxToNode(idx);
|
||
|
idx = node->next.load(std::memory_order_relaxed);
|
||
|
while (idx) {
|
||
|
if (idx == removeidx) {
|
||
|
// Pop from mid-list.
|
||
|
node->next.store(
|
||
|
removenode.next.load(std::memory_order_relaxed),
|
||
|
std::memory_order_relaxed);
|
||
|
result = true;
|
||
|
break;
|
||
|
}
|
||
|
node = &idxToNode(idx);
|
||
|
idx = node->next.load(std::memory_order_relaxed);
|
||
|
}
|
||
|
// Unlock and return result
|
||
|
head_->store(head.withoutLock(head.idx()), std::memory_order_release);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
cacheline_aligned<folly::AtomicStruct<LifoSemHead, Atom>> head_;
|
||
|
|
||
|
static LifoSemNode<Handoff, Atom>& idxToNode(uint32_t idx) {
|
||
|
auto raw = &LifoSemRawNode<Atom>::pool()[idx];
|
||
|
return *static_cast<LifoSemNode<Handoff, Atom>*>(raw);
|
||
|
}
|
||
|
|
||
|
static uint32_t nodeToIdx(const LifoSemNode<Handoff, Atom>& node) {
|
||
|
return LifoSemRawNode<Atom>::pool().locateElem(&node);
|
||
|
}
|
||
|
|
||
|
/// Either increments by n and returns 0, or pops a node and returns it.
|
||
|
/// If n + the stripe's value overflows, then the stripe's value
|
||
|
/// saturates silently at 2^32-1
|
||
|
uint32_t incrOrPop(uint32_t n) {
|
||
|
while (true) {
|
||
|
assert(n > 0);
|
||
|
|
||
|
auto head = head_->load(std::memory_order_acquire);
|
||
|
if (head.isLocked()) {
|
||
|
std::this_thread::yield();
|
||
|
continue;
|
||
|
}
|
||
|
if (head.isNodeIdx()) {
|
||
|
auto& node = idxToNode(head.idx());
|
||
|
if (head_->compare_exchange_strong(
|
||
|
head,
|
||
|
head.withPop(node.next.load(std::memory_order_relaxed)))) {
|
||
|
// successful pop
|
||
|
return head.idx();
|
||
|
}
|
||
|
} else {
|
||
|
auto after = head.withValueIncr(n);
|
||
|
if (head_->compare_exchange_strong(head, after)) {
|
||
|
// successful incr
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
// retry
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/// Returns DECR if some amount was decremented, with that amount
|
||
|
/// subtracted from n. If n is 1 and this function returns DECR then n
|
||
|
/// must be 0 afterward. Returns PUSH if no value could be decremented
|
||
|
/// and idx was pushed, or if idx was zero and no push was performed but
|
||
|
/// a push would have been performed with a valid node. Returns SHUTDOWN
|
||
|
/// if the caller should have blocked but isShutdown(). If idx == 0,
|
||
|
/// may return PUSH even after isShutdown() or may return SHUTDOWN
|
||
|
WaitResult decrOrPush(uint32_t& n, uint32_t idx) {
|
||
|
assert(n > 0);
|
||
|
|
||
|
while (true) {
|
||
|
auto head = head_->load(std::memory_order_acquire);
|
||
|
|
||
|
if (head.isLocked()) {
|
||
|
std::this_thread::yield();
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (!head.isNodeIdx() && head.value() > 0) {
|
||
|
// decr
|
||
|
auto delta = std::min(n, head.value());
|
||
|
if (head_->compare_exchange_strong(head, head.withValueDecr(delta))) {
|
||
|
n -= delta;
|
||
|
return WaitResult::DECR;
|
||
|
}
|
||
|
} else {
|
||
|
// push
|
||
|
if (idx == 0) {
|
||
|
return WaitResult::PUSH;
|
||
|
}
|
||
|
|
||
|
if (UNLIKELY(head.isShutdown())) {
|
||
|
return WaitResult::SHUTDOWN;
|
||
|
}
|
||
|
|
||
|
auto& node = idxToNode(idx);
|
||
|
node.next.store(
|
||
|
head.isNodeIdx() ? head.idx() : 0, std::memory_order_relaxed);
|
||
|
if (head_->compare_exchange_strong(head, head.withPush(idx))) {
|
||
|
// push succeeded
|
||
|
return WaitResult::PUSH;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// retry
|
||
|
}
|
||
|
};
|
||
|
|
||
|
} // namespace detail
|
||
|
|
||
|
template <template <typename> class Atom, class BatonType>
|
||
|
struct LifoSemImpl : public detail::LifoSemBase<BatonType, Atom> {
|
||
|
constexpr explicit LifoSemImpl(uint32_t v = 0)
|
||
|
: detail::LifoSemBase<BatonType, Atom>(v) {}
|
||
|
};
|
||
|
|
||
|
} // namespace folly
|