181 lines
6.0 KiB
C++
181 lines
6.0 KiB
C++
/*
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <folly/Function.h>
|
|
#include <folly/ThreadLocal.h>
|
|
#include <folly/synchronization/AsymmetricMemoryBarrier.h>
|
|
|
|
// This is unlike folly::ThreadCachedInt in that the full value
|
|
// is never rounded up globally and cached, it only supports readFull.
|
|
//
|
|
// folly/experimental/TLRefCount is similar, but does not support a
|
|
// waitForZero, and is not reset-able.
|
|
//
|
|
// Note that the RCU implementation is completely abstracted from the
|
|
// counter implementation, a rseq implementation can be dropped in
|
|
// if the kernel supports it.
|
|
|
|
namespace folly {
|
|
|
|
namespace detail {
|
|
|
|
template <typename Tag>
|
|
class ThreadCachedInts {
|
|
std::atomic<int64_t> orphan_inc_[2] = {};
|
|
std::atomic<int64_t> orphan_dec_[2] = {};
|
|
folly::detail::Futex<> waiting_{0};
|
|
|
|
class Integer {
|
|
public:
|
|
ThreadCachedInts* ints_;
|
|
constexpr Integer(ThreadCachedInts* ints) noexcept
|
|
: ints_(ints), inc_{}, dec_{}, cache_(ints->int_cache_) {}
|
|
std::atomic<int64_t> inc_[2];
|
|
std::atomic<int64_t> dec_[2];
|
|
Integer*& cache_; // reference to the cached ptr
|
|
~Integer() noexcept {
|
|
// Increment counts must be set before decrement counts
|
|
ints_->orphan_inc_[0].fetch_add(
|
|
inc_[0].load(std::memory_order_relaxed), std::memory_order_relaxed);
|
|
ints_->orphan_inc_[1].fetch_add(
|
|
inc_[1].load(std::memory_order_relaxed), std::memory_order_relaxed);
|
|
folly::asymmetricLightBarrier(); // B
|
|
ints_->orphan_dec_[0].fetch_add(
|
|
dec_[0].load(std::memory_order_relaxed), std::memory_order_relaxed);
|
|
ints_->orphan_dec_[1].fetch_add(
|
|
dec_[1].load(std::memory_order_relaxed), std::memory_order_relaxed);
|
|
ints_->waiting_.store(0, std::memory_order_release);
|
|
detail::futexWake(&ints_->waiting_);
|
|
// reset the cache_ on destructor so we can handle the delete/recreate
|
|
cache_ = nullptr;
|
|
}
|
|
};
|
|
folly::ThreadLocalPtr<Integer, Tag> cs_;
|
|
|
|
// Cache the int pointer in a threadlocal.
|
|
static thread_local Integer* int_cache_;
|
|
|
|
void init() {
|
|
auto ret = new Integer(this);
|
|
cs_.reset(ret);
|
|
int_cache_ = ret;
|
|
}
|
|
|
|
public:
|
|
FOLLY_ALWAYS_INLINE void increment(uint8_t epoch) {
|
|
if (!int_cache_) {
|
|
init();
|
|
}
|
|
|
|
auto& c = int_cache_->inc_[epoch];
|
|
auto val = c.load(std::memory_order_relaxed);
|
|
c.store(val + 1, std::memory_order_relaxed);
|
|
|
|
folly::asymmetricLightBarrier(); // A
|
|
}
|
|
|
|
FOLLY_ALWAYS_INLINE void decrement(uint8_t epoch) {
|
|
folly::asymmetricLightBarrier(); // B
|
|
if (!int_cache_) {
|
|
init();
|
|
}
|
|
|
|
auto& c = int_cache_->dec_[epoch];
|
|
auto val = c.load(std::memory_order_relaxed);
|
|
c.store(val + 1, std::memory_order_relaxed);
|
|
|
|
folly::asymmetricLightBarrier(); // C
|
|
if (waiting_.load(std::memory_order_acquire)) {
|
|
waiting_.store(0, std::memory_order_release);
|
|
detail::futexWake(&waiting_);
|
|
}
|
|
}
|
|
|
|
int64_t readFull(uint8_t epoch) {
|
|
int64_t full = -orphan_dec_[epoch].load(std::memory_order_relaxed);
|
|
|
|
// Matches A - ensure all threads have seen new value of version,
|
|
// *and* that we see current values of counters in readFull()
|
|
//
|
|
// Note that in lock_shared if a reader is currently between the
|
|
// version load and counter increment, they may update the wrong
|
|
// epoch. However, this is ok - they started concurrently *after*
|
|
// any callbacks that will run, and therefore it is safe to run
|
|
// the callbacks.
|
|
folly::asymmetricHeavyBarrier();
|
|
for (auto& i : cs_.accessAllThreads()) {
|
|
full -= i.dec_[epoch].load(std::memory_order_relaxed);
|
|
}
|
|
|
|
// Matches B - ensure that all increments are seen if decrements
|
|
// are seen. This is necessary because increment and decrement
|
|
// are allowed to happen on different threads.
|
|
folly::asymmetricHeavyBarrier();
|
|
|
|
auto accessor = cs_.accessAllThreads();
|
|
for (auto& i : accessor) {
|
|
full += i.inc_[epoch].load(std::memory_order_relaxed);
|
|
}
|
|
|
|
// orphan is read behind accessAllThreads lock
|
|
return full + orphan_inc_[epoch].load(std::memory_order_relaxed);
|
|
}
|
|
|
|
void waitForZero(uint8_t phase) {
|
|
// Try reading before futex sleeping.
|
|
if (readFull(phase) == 0) {
|
|
return;
|
|
}
|
|
|
|
while (true) {
|
|
waiting_.store(1, std::memory_order_release);
|
|
// Matches C. Ensure either decrement sees waiting_,
|
|
// or we see their decrement and can safely sleep.
|
|
folly::asymmetricHeavyBarrier();
|
|
if (readFull(phase) == 0) {
|
|
break;
|
|
}
|
|
detail::futexWait(&waiting_, 1);
|
|
}
|
|
waiting_.store(0, std::memory_order_relaxed);
|
|
}
|
|
|
|
// We are guaranteed to be called while StaticMeta lock is still
|
|
// held because of ordering in AtForkList. We can therefore safely
|
|
// touch orphan_ and clear out all counts.
|
|
void resetAfterFork() {
|
|
if (int_cache_) {
|
|
int_cache_->dec_[0].store(0, std::memory_order_relaxed);
|
|
int_cache_->dec_[1].store(0, std::memory_order_relaxed);
|
|
int_cache_->inc_[0].store(0, std::memory_order_relaxed);
|
|
int_cache_->inc_[1].store(0, std::memory_order_relaxed);
|
|
}
|
|
orphan_inc_[0].store(0, std::memory_order_relaxed);
|
|
orphan_inc_[1].store(0, std::memory_order_relaxed);
|
|
orphan_dec_[0].store(0, std::memory_order_relaxed);
|
|
orphan_dec_[1].store(0, std::memory_order_relaxed);
|
|
}
|
|
};
|
|
|
|
template <typename Tag>
|
|
thread_local typename detail::ThreadCachedInts<Tag>::Integer*
|
|
detail::ThreadCachedInts<Tag>::int_cache_ = nullptr;
|
|
|
|
} // namespace detail
|
|
} // namespace folly
|