/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include #include #include #include #include #include #include #include #include #include #include namespace folly { /// A Baton allows a thread to block once and be awoken. Captures a /// single handoff, and during its lifecycle (from construction/reset /// to destruction/reset) a baton must either be post()ed and wait()ed /// exactly once each, or not at all. /// /// Baton includes no internal padding, and is only 4 bytes in size. /// Any alignment or padding to avoid false sharing is up to the user. /// /// This is basically a stripped-down semaphore that supports only a /// single call to sem_post and a single call to sem_wait. /// /// The non-blocking version (MayBlock == false) provides more speed /// by using only load acquire and store release operations in the /// critical path, at the cost of disallowing blocking. /// /// The current posix semaphore sem_t isn't too bad, but this provides /// more a bit more speed, inlining, smaller size, a guarantee that /// the implementation won't change, and compatibility with /// DeterministicSchedule. By having a much more restrictive /// lifecycle we can also add a bunch of assertions that can help to /// catch race conditions ahead of time. template class Atom = std::atomic> class Baton { public: FOLLY_ALWAYS_INLINE static constexpr WaitOptions wait_options() { return {}; } constexpr Baton() noexcept : state_(INIT) {} Baton(Baton const&) = delete; Baton& operator=(Baton const&) = delete; /// It is an error to destroy a Baton on which a thread is currently /// wait()ing. In practice this means that the waiter usually takes /// responsibility for destroying the Baton. ~Baton() noexcept { // The docblock for this function says that it can't be called when // there is a concurrent waiter. We assume a strong version of this // requirement in which the caller must _know_ that this is true, they // are not allowed to be merely lucky. If two threads are involved, // the destroying thread must actually have synchronized with the // waiting thread after wait() returned. To convey causality the the // waiting thread must have used release semantics and the destroying // thread must have used acquire semantics for that communication, // so we are guaranteed to see the post-wait() value of state_, // which cannot be WAITING. // // Note that since we only care about a single memory location, // the only two plausible memory orders here are relaxed and seq_cst. assert(state_.load(std::memory_order_relaxed) != WAITING); } FOLLY_ALWAYS_INLINE bool ready() const noexcept { auto s = state_.load(std::memory_order_acquire); assert(s == INIT || s == EARLY_DELIVERY); return LIKELY(s == EARLY_DELIVERY); } /// Equivalent to destroying the Baton and creating a new one. It is /// a bug to call this while there is a waiting thread, so in practice /// the waiter will be the one that resets the baton. void reset() noexcept { // See ~Baton for a discussion about why relaxed is okay here assert(state_.load(std::memory_order_relaxed) != WAITING); // We use a similar argument to justify the use of a relaxed store // here. Since both wait() and post() are required to be called // only once per lifetime, no thread can actually call those methods // correctly after a reset() unless it synchronizes with the thread // that performed the reset(). If a post() or wait() on another thread // didn't synchronize, then regardless of what operation we performed // here there would be a race on proper use of the Baton's spec // (although not on any particular load and store). Put another way, // we don't need to synchronize here because anybody that might rely // on such synchronization is required by the baton rules to perform // an additional synchronization that has the desired effect anyway. // // There is actually a similar argument to be made about the // constructor, in which the fenceless constructor initialization // of state_ is piggybacked on whatever synchronization mechanism // distributes knowledge of the Baton's existence state_.store(INIT, std::memory_order_relaxed); } /// Causes wait() to wake up. For each lifetime of a Baton (where a /// lifetime starts at construction or reset() and ends at /// destruction or reset()) there can be at most one call to post(), /// in the single poster version. Any thread may call post(). void post() noexcept { if (!MayBlock) { /// Spin-only version /// assert( ((1 << state_.load(std::memory_order_relaxed)) & ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0); state_.store(EARLY_DELIVERY, std::memory_order_release); return; } /// May-block versions /// uint32_t before = state_.load(std::memory_order_acquire); assert(before == INIT || before == WAITING || before == TIMED_OUT); if (before == INIT && state_.compare_exchange_strong( before, EARLY_DELIVERY, std::memory_order_release, std::memory_order_relaxed)) { return; } assert(before == WAITING || before == TIMED_OUT); if (before == TIMED_OUT) { return; } assert(before == WAITING); state_.store(LATE_DELIVERY, std::memory_order_release); detail::futexWake(&state_, 1); } /// Waits until post() has been called in the current Baton lifetime. /// May be called at most once during a Baton lifetime (construction /// |reset until destruction|reset). If post is called before wait in /// the current lifetime then this method returns immediately. /// /// The restriction that there can be at most one wait() per lifetime /// could be relaxed somewhat without any perf or size regressions, /// but by making this condition very restrictive we can provide better /// checking in debug builds. FOLLY_ALWAYS_INLINE void wait(const WaitOptions& opt = wait_options()) noexcept { if (try_wait()) { return; } auto const deadline = std::chrono::steady_clock::time_point::max(); tryWaitSlow(deadline, opt); } /// Similar to wait, but doesn't block the thread if it hasn't been posted. /// /// try_wait has the following semantics: /// - It is ok to call try_wait any number times on the same baton until /// try_wait reports that the baton has been posted. /// - It is ok to call timed_wait or wait on the same baton if try_wait /// reports that baton hasn't been posted. /// - If try_wait indicates that the baton has been posted, it is invalid to /// call wait, try_wait or timed_wait on the same baton without resetting /// /// @return true if baton has been posted, false othewise FOLLY_ALWAYS_INLINE bool try_wait() const noexcept { return ready(); } /// Similar to wait, but with a timeout. The thread is unblocked if the /// timeout expires. /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other /// words, after try_wait_for the caller can't invoke /// wait/try_wait/try_wait_for/try_wait_until /// again on the same baton without resetting it. /// /// @param timeout Time until which the thread can block /// @return true if the baton was posted to before timeout, /// false otherwise template FOLLY_ALWAYS_INLINE bool try_wait_for( const std::chrono::duration& timeout, const WaitOptions& opt = wait_options()) noexcept { if (try_wait()) { return true; } auto const deadline = std::chrono::steady_clock::now() + timeout; return tryWaitSlow(deadline, opt); } /// Similar to wait, but with a deadline. The thread is unblocked if the /// deadline expires. /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other /// words, after try_wait_until the caller can't invoke /// wait/try_wait/try_wait_for/try_wait_until /// again on the same baton without resetting it. /// /// @param deadline Time until which the thread can block /// @return true if the baton was posted to before deadline, /// false otherwise template FOLLY_ALWAYS_INLINE bool try_wait_until( const std::chrono::time_point& deadline, const WaitOptions& opt = wait_options()) noexcept { if (try_wait()) { return true; } return tryWaitSlow(deadline, opt); } /// Alias to try_wait_for. Deprecated. template FOLLY_ALWAYS_INLINE bool timed_wait( const std::chrono::duration& timeout) noexcept { return try_wait_for(timeout); } /// Alias to try_wait_until. Deprecated. template FOLLY_ALWAYS_INLINE bool timed_wait( const std::chrono::time_point& deadline) noexcept { return try_wait_until(deadline); } private: enum State : uint32_t { INIT = 0, EARLY_DELIVERY = 1, WAITING = 2, LATE_DELIVERY = 3, TIMED_OUT = 4, }; template FOLLY_NOINLINE bool tryWaitSlow( const std::chrono::time_point& deadline, const WaitOptions& opt) noexcept { if (opt.logging_enabled()) { folly::async_tracing::logBlockingOperation( std::chrono::duration_cast( deadline - Clock::now())); } switch (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) { case detail::spin_result::success: return true; case detail::spin_result::timeout: return false; case detail::spin_result::advance: break; } if (!MayBlock) { switch (detail::spin_yield_until(deadline, [=] { return ready(); })) { case detail::spin_result::success: return true; case detail::spin_result::timeout: return false; case detail::spin_result::advance: break; } } // guess we have to block :( uint32_t expected = INIT; if (!state_.compare_exchange_strong( expected, WAITING, std::memory_order_relaxed, std::memory_order_relaxed)) { // CAS failed, last minute reprieve assert(expected == EARLY_DELIVERY); // TODO: move the acquire to the compare_exchange failure load after C++17 std::atomic_thread_fence(std::memory_order_acquire); return true; } while (true) { auto rv = detail::MemoryIdler::futexWaitUntil(state_, WAITING, deadline); // Awoken by the deadline passing. if (rv == detail::FutexResult::TIMEDOUT) { assert(deadline != (std::chrono::time_point::max())); state_.store(TIMED_OUT, std::memory_order_release); return false; } // Probably awoken by a matching wake event, but could also by awoken // by an asynchronous signal or by a spurious wakeup. // // state_ is the truth even if FUTEX_WAIT reported a matching // FUTEX_WAKE, since we aren't using type-stable storage and we // don't guarantee reuse. The scenario goes like this: thread // A's last touch of a Baton is a call to wake(), which stores // LATE_DELIVERY and gets an unlucky context switch before delivering // the corresponding futexWake. Thread B sees LATE_DELIVERY // without consuming a futex event, because it calls futexWait // with an expected value of WAITING and hence doesn't go to sleep. // B returns, so the Baton's memory is reused and becomes another // Baton (or a reuse of this one). B calls futexWait on the new // Baton lifetime, then A wakes up and delivers a spurious futexWake // to the same memory location. B's futexWait will then report a // consumed wake event even though state_ is still WAITING. // // It would be possible to add an extra state_ dance to communicate // that the futexWake has been sent so that we can be sure to consume // it before returning, but that would be a perf and complexity hit. uint32_t s = state_.load(std::memory_order_acquire); assert(s == WAITING || s == LATE_DELIVERY); if (s == LATE_DELIVERY) { return true; } } } detail::Futex state_; }; } // namespace folly