/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace folly { namespace detail { namespace distributed_mutex { // kUnlocked is used to show unlocked state // // When locking threads encounter kUnlocked in the underlying storage, they // can just acquire the lock without any further effort constexpr auto kUnlocked = std::uintptr_t{0b0}; // kLocked is used to show that the mutex is currently locked, and future // attempts to lock the mutex should enqueue on the central storage // // Locking threads find this on central storage only when there is a // contention chain that is undergoing wakeups, in every other case, a locker // will either find kUnlocked or an arbitrary address with the kLocked bit set constexpr auto kLocked = std::uintptr_t{0b1}; // kTimedWaiter is set when there is at least one timed waiter on the mutex // // Timed waiters do not follow the sleeping strategy employed by regular, // non-timed threads. They sleep on the central mutex atomic through an // extended futex() interface that allows sleeping with the same semantics for // non-standard integer widths // // When a regular non-timed thread unlocks or enqueues on the mutex, and sees // a timed waiter, it takes ownership of all the timed waiters. The thread // that has taken ownership of the timed waiter releases the timed waiters // when it gets a chance at the critical section. At which point it issues a // wakeup to single timed waiter, timed waiters always issue wake() calls to // other timed waiters constexpr auto kTimedWaiter = std::uintptr_t{0b10}; // kUninitialized means that the thread has just enqueued, and has not yet // gotten to initializing itself with the address of its successor // // this becomes significant for threads that are trying to wake up the // uninitialized thread, if they see that the thread is not yet initialized, // they can do nothing but spin, and wait for the thread to get initialized // // This also plays a role in the functioning of flat combining as implemented // in DistributedMutex. When a thread owning the lock goes through the // contention chain to either unlock the mutex or combine critical sections // from the other end. The presence of kUninitialized means that the // combining thread is not able to make progress after this point. So we // transfer the lock. constexpr auto kUninitialized = std::uint32_t{0b0}; // kWaiting will be set in the waiter's futex structs while they are spinning // while waiting for the mutex constexpr auto kWaiting = std::uint32_t{0b1}; // kWake will be set by threads that are waking up waiters that have enqueued constexpr auto kWake = std::uint32_t{0b10}; // kSkipped will be set by a waker when they see that a waiter has been // preempted away by the kernel, in this case the thread that got skipped will // have to wake up and put itself back on the queue constexpr auto kSkipped = std::uint32_t{0b11}; // kAboutToWait will be set by a waiter that enqueues itself with the purpose // of waiting on a futex constexpr auto kAboutToWait = std::uint32_t{0b100}; // kSleeping will be set by a waiter right before enqueueing on a futex. When // a thread wants to wake up a waiter that has enqueued on a futex, it should // set the futex to contain kWake // // a thread that is unlocking and wants to skip over a sleeping thread also // calls futex_.exchange(kSleeping) on the sleeping thread's futex word. It // does this to 1. detect whether the sleeping thread had actually gone to // sleeping on the futex word so it can skip it, and 2. to synchronize with // other non atomic writes in the sleeping thread's context (such as the write // to track the next waiting thread). // // We reuse kSleeping instead of say using another constant kEarlyDelivery to // avoid situations where a thread has to enter kernel mode due to calling // futexWait() twice because of the presence of a waking thread. This // situation can arise when an unlocking thread goes to skip over a sleeping // thread, sees that the thread has slept and move on, but the sleeping thread // had not yet entered futex(). This interleaving causes the thread calling // futex() to return spuriously, as the futex word is not what it should be constexpr auto kSleeping = std::uint32_t{0b101}; // kCombined is set by the lock holder to let the waiter thread know that its // combine request was successfully completed by the lock holder. A // successful combine means that the thread requesting the combine operation // does not need to unlock the mutex; in fact, doing so would be an error. constexpr auto kCombined = std::uint32_t{0b111}; // kCombineUninitialized is like kUninitialized but is set by a thread when it // enqueues in hopes of getting its critical section combined with the lock // holder constexpr auto kCombineUninitialized = std::uint32_t{0b1000}; // kCombineWaiting is set by a thread when it is ready to have its combine // record fulfilled by the lock holder. In particular, this signals to the // lock holder that the thread has set its next_ pointer in the contention // chain constexpr auto kCombineWaiting = std::uint32_t{0b1001}; // kExceptionOccurred is set on the waiter futex when the remote task throws // an exception. It is the caller's responsibility to retrieve the exception // and rethrow it in their own context. Note that when the caller uses a // noexcept function as their critical section, they can avoid checking for // this value // // This allows us to avoid all cost of exceptions in the memory layout of the // fast path (no errors) as exceptions are stored as an std::exception_ptr in // the same union that stores the return value of the critical section. We // also avoid all CPU overhead because the combiner uses a try-catch block // without any additional branching to handle exceptions constexpr auto kExceptionOccurred = std::uint32_t{0b1010}; // The number of spins that we are allowed to do before we resort to marking a // thread as having slept // // This is just a magic number from benchmarks constexpr auto kScheduledAwaySpinThreshold = std::chrono::nanoseconds{200}; // The maximum number of spins before a thread starts yielding its processor // in hopes of getting skipped constexpr auto kMaxSpins = 4000; // The maximum number of contention chains we can resolve with flat combining. // After this number of contention chains, the mutex falls back to regular // two-phased mutual exclusion to ensure that we don't starve the combiner // thread constexpr auto kMaxCombineIterations = 2; /** * Write only data that is available to the thread that is waking up another. * Only the waking thread is allowed to write to this, the thread to be woken * is allowed to read from this after a wakeup has been issued */ template