475 lines
14 KiB
C++
475 lines
14 KiB
C++
/*
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <folly/detail/ThreadLocalDetail.h>
|
|
#include <folly/synchronization/CallOnce.h>
|
|
|
|
#include <list>
|
|
#include <mutex>
|
|
|
|
constexpr auto kSmallGrowthFactor = 1.1;
|
|
constexpr auto kBigGrowthFactor = 1.7;
|
|
|
|
namespace folly {
|
|
namespace threadlocal_detail {
|
|
|
|
void ThreadEntryNode::initIfZero(bool locked) {
|
|
if (UNLIKELY(!next)) {
|
|
if (LIKELY(locked)) {
|
|
parent->meta->pushBackLocked(parent, id);
|
|
} else {
|
|
parent->meta->pushBackUnlocked(parent, id);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ThreadEntryNode::push_back(ThreadEntry* head) {
|
|
// get the head prev and next nodes
|
|
ThreadEntryNode* hnode = &head->elements[id].node;
|
|
|
|
// update current
|
|
next = head;
|
|
prev = hnode->prev;
|
|
|
|
// hprev
|
|
ThreadEntryNode* hprev = &hnode->prev->elements[id].node;
|
|
hprev->next = parent;
|
|
hnode->prev = parent;
|
|
}
|
|
|
|
void ThreadEntryNode::eraseZero() {
|
|
if (LIKELY(prev != nullptr)) {
|
|
// get the prev and next nodes
|
|
ThreadEntryNode* nprev = &prev->elements[id].node;
|
|
ThreadEntryNode* nnext = &next->elements[id].node;
|
|
|
|
// update the prev and next
|
|
nnext->prev = prev;
|
|
nprev->next = next;
|
|
|
|
// set the prev and next to nullptr
|
|
next = prev = nullptr;
|
|
}
|
|
}
|
|
|
|
StaticMetaBase::StaticMetaBase(ThreadEntry* (*threadEntry)(), bool strict)
|
|
: nextId_(1), threadEntry_(threadEntry), strict_(strict) {
|
|
int ret = pthread_key_create(&pthreadKey_, &onThreadExit);
|
|
checkPosixError(ret, "pthread_key_create failed");
|
|
PthreadKeyUnregister::registerKey(pthreadKey_);
|
|
}
|
|
|
|
ThreadEntryList* StaticMetaBase::getThreadEntryList() {
|
|
#ifdef FOLLY_TLD_USE_FOLLY_TLS
|
|
static FOLLY_TLS ThreadEntryList threadEntryListSingleton;
|
|
return &threadEntryListSingleton;
|
|
#else
|
|
class PthreadKey {
|
|
public:
|
|
PthreadKey() {
|
|
int ret = pthread_key_create(&pthreadKey_, nullptr);
|
|
checkPosixError(ret, "pthread_key_create failed");
|
|
PthreadKeyUnregister::registerKey(pthreadKey_);
|
|
}
|
|
|
|
FOLLY_ALWAYS_INLINE pthread_key_t get() const {
|
|
return pthreadKey_;
|
|
}
|
|
|
|
private:
|
|
pthread_key_t pthreadKey_;
|
|
};
|
|
|
|
auto& instance = detail::createGlobal<PthreadKey, void>();
|
|
|
|
ThreadEntryList* threadEntryList =
|
|
static_cast<ThreadEntryList*>(pthread_getspecific(instance.get()));
|
|
|
|
if (UNLIKELY(!threadEntryList)) {
|
|
threadEntryList = new ThreadEntryList();
|
|
int ret = pthread_setspecific(instance.get(), threadEntryList);
|
|
checkPosixError(ret, "pthread_setspecific failed");
|
|
}
|
|
|
|
return threadEntryList;
|
|
#endif
|
|
}
|
|
|
|
bool StaticMetaBase::dying() {
|
|
for (auto te = getThreadEntryList()->head; te; te = te->listNext) {
|
|
if (te->removed_) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void StaticMetaBase::onThreadExit(void* ptr) {
|
|
auto threadEntry = static_cast<ThreadEntry*>(ptr);
|
|
|
|
{
|
|
auto& meta = *threadEntry->meta;
|
|
|
|
// Make sure this ThreadEntry is available if ThreadLocal A is accessed in
|
|
// ThreadLocal B destructor.
|
|
pthread_setspecific(meta.pthreadKey_, threadEntry);
|
|
SharedMutex::ReadHolder rlock(nullptr);
|
|
if (meta.strict_) {
|
|
rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_);
|
|
}
|
|
{
|
|
std::lock_guard<std::mutex> g(meta.lock_);
|
|
// mark it as removed
|
|
threadEntry->removed_ = true;
|
|
auto elementsCapacity = threadEntry->getElementsCapacity();
|
|
for (size_t i = 0u; i < elementsCapacity; ++i) {
|
|
threadEntry->elements[i].node.eraseZero();
|
|
}
|
|
// No need to hold the lock any longer; the ThreadEntry is private to this
|
|
// thread now that it's been removed from meta.
|
|
}
|
|
// NOTE: User-provided deleter / object dtor itself may be using ThreadLocal
|
|
// with the same Tag, so dispose() calls below may (re)create some of the
|
|
// elements or even increase elementsCapacity, thus multiple cleanup rounds
|
|
// may be required.
|
|
for (bool shouldRun = true; shouldRun;) {
|
|
shouldRun = false;
|
|
auto elementsCapacity = threadEntry->getElementsCapacity();
|
|
FOR_EACH_RANGE (i, 0, elementsCapacity) {
|
|
if (threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
|
|
threadEntry->elements[i].cleanup();
|
|
shouldRun = true;
|
|
}
|
|
}
|
|
}
|
|
pthread_setspecific(meta.pthreadKey_, nullptr);
|
|
}
|
|
|
|
auto threadEntryList = threadEntry->list;
|
|
DCHECK_GT(threadEntryList->count, 0u);
|
|
|
|
--threadEntryList->count;
|
|
|
|
if (threadEntryList->count) {
|
|
return;
|
|
}
|
|
|
|
// dispose all the elements
|
|
for (bool shouldRunOuter = true; shouldRunOuter;) {
|
|
shouldRunOuter = false;
|
|
auto tmp = threadEntryList->head;
|
|
while (tmp) {
|
|
auto& meta = *tmp->meta;
|
|
pthread_setspecific(meta.pthreadKey_, tmp);
|
|
SharedMutex::ReadHolder rlock(nullptr);
|
|
if (meta.strict_) {
|
|
rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_);
|
|
}
|
|
for (bool shouldRunInner = true; shouldRunInner;) {
|
|
shouldRunInner = false;
|
|
auto elementsCapacity = tmp->getElementsCapacity();
|
|
FOR_EACH_RANGE (i, 0, elementsCapacity) {
|
|
if (tmp->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
|
|
tmp->elements[i].cleanup();
|
|
shouldRunInner = true;
|
|
shouldRunOuter = true;
|
|
}
|
|
}
|
|
}
|
|
pthread_setspecific(meta.pthreadKey_, nullptr);
|
|
tmp = tmp->listNext;
|
|
}
|
|
}
|
|
|
|
// free the entry list
|
|
auto head = threadEntryList->head;
|
|
threadEntryList->head = nullptr;
|
|
while (head) {
|
|
auto tmp = head;
|
|
head = head->listNext;
|
|
if (tmp->elements) {
|
|
free(tmp->elements);
|
|
tmp->elements = nullptr;
|
|
tmp->setElementsCapacity(0);
|
|
}
|
|
|
|
#ifndef FOLLY_TLD_USE_FOLLY_TLS
|
|
delete tmp;
|
|
#endif
|
|
}
|
|
|
|
#ifndef FOLLY_TLD_USE_FOLLY_TLS
|
|
delete threadEntryList;
|
|
#endif
|
|
}
|
|
|
|
uint32_t StaticMetaBase::elementsCapacity() const {
|
|
ThreadEntry* threadEntry = (*threadEntry_)();
|
|
|
|
return FOLLY_LIKELY(!!threadEntry) ? threadEntry->getElementsCapacity() : 0;
|
|
}
|
|
|
|
uint32_t StaticMetaBase::allocate(EntryID* ent) {
|
|
uint32_t id;
|
|
auto& meta = *this;
|
|
std::lock_guard<std::mutex> g(meta.lock_);
|
|
|
|
id = ent->value.load();
|
|
if (id != kEntryIDInvalid) {
|
|
return id;
|
|
}
|
|
|
|
if (!meta.freeIds_.empty()) {
|
|
id = meta.freeIds_.back();
|
|
meta.freeIds_.pop_back();
|
|
} else {
|
|
id = meta.nextId_++;
|
|
}
|
|
|
|
uint32_t old_id = ent->value.exchange(id);
|
|
DCHECK_EQ(old_id, kEntryIDInvalid);
|
|
|
|
reserveHeadUnlocked(id);
|
|
|
|
return id;
|
|
}
|
|
|
|
void StaticMetaBase::destroy(EntryID* ent) {
|
|
try {
|
|
auto& meta = *this;
|
|
|
|
// Elements in other threads that use this id.
|
|
std::vector<ElementWrapper> elements;
|
|
|
|
{
|
|
SharedMutex::WriteHolder wlock(nullptr);
|
|
if (meta.strict_) {
|
|
/*
|
|
* In strict mode, the logic guarantees per-thread instances are
|
|
* destroyed by the moment ThreadLocal<> dtor returns.
|
|
* In order to achieve that, we should wait until concurrent
|
|
* onThreadExit() calls (that might acquire ownership over per-thread
|
|
* instances in order to destroy them) are finished.
|
|
*/
|
|
wlock = SharedMutex::WriteHolder(meta.accessAllThreadsLock_);
|
|
}
|
|
|
|
{
|
|
std::lock_guard<std::mutex> g(meta.lock_);
|
|
uint32_t id = ent->value.exchange(kEntryIDInvalid);
|
|
if (id == kEntryIDInvalid) {
|
|
return;
|
|
}
|
|
|
|
auto& node = meta.head_.elements[id].node;
|
|
while (!node.empty()) {
|
|
auto* next = node.getNext();
|
|
next->eraseZero();
|
|
|
|
ThreadEntry* e = next->parent;
|
|
auto elementsCapacity = e->getElementsCapacity();
|
|
if (id < elementsCapacity && e->elements[id].ptr) {
|
|
elements.push_back(e->elements[id]);
|
|
|
|
/*
|
|
* Writing another thread's ThreadEntry from here is fine;
|
|
* the only other potential reader is the owning thread --
|
|
* from onThreadExit (which grabs the lock, so is properly
|
|
* synchronized with us) or from get(), which also grabs
|
|
* the lock if it needs to resize the elements vector.
|
|
*
|
|
* We can't conflict with reads for a get(id), because
|
|
* it's illegal to call get on a thread local that's
|
|
* destructing.
|
|
*/
|
|
e->elements[id].ptr = nullptr;
|
|
e->elements[id].deleter1 = nullptr;
|
|
e->elements[id].ownsDeleter = false;
|
|
}
|
|
}
|
|
meta.freeIds_.push_back(id);
|
|
}
|
|
}
|
|
// Delete elements outside the locks.
|
|
for (ElementWrapper& elem : elements) {
|
|
if (elem.dispose(TLPDestructionMode::ALL_THREADS)) {
|
|
elem.cleanup();
|
|
}
|
|
}
|
|
} catch (...) { // Just in case we get a lock error or something anyway...
|
|
LOG(WARNING) << "Destructor discarding an exception that was thrown.";
|
|
}
|
|
}
|
|
|
|
ElementWrapper* StaticMetaBase::reallocate(
|
|
ThreadEntry* threadEntry,
|
|
uint32_t idval,
|
|
size_t& newCapacity) {
|
|
size_t prevCapacity = threadEntry->getElementsCapacity();
|
|
|
|
// Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent
|
|
// very slow start.
|
|
auto smallCapacity = static_cast<size_t>((idval + 5) * kSmallGrowthFactor);
|
|
auto bigCapacity = static_cast<size_t>((idval + 5) * kBigGrowthFactor);
|
|
|
|
newCapacity =
|
|
(threadEntry->meta &&
|
|
(bigCapacity <= threadEntry->meta->head_.getElementsCapacity()))
|
|
? bigCapacity
|
|
: smallCapacity;
|
|
|
|
assert(newCapacity > prevCapacity);
|
|
ElementWrapper* reallocated = nullptr;
|
|
|
|
// Need to grow. Note that we can't call realloc, as elements is
|
|
// still linked in meta, so another thread might access invalid memory
|
|
// after realloc succeeds. We'll copy by hand and update our ThreadEntry
|
|
// under the lock.
|
|
if (usingJEMalloc()) {
|
|
bool success = false;
|
|
size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0);
|
|
|
|
// Try to grow in place.
|
|
//
|
|
// Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory,
|
|
// even if a previous allocation allocated more than we requested.
|
|
// This is fine; we always use MALLOCX_ZERO with jemalloc and we
|
|
// always expand our allocation to the real size.
|
|
if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) {
|
|
success =
|
|
(xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO) ==
|
|
newByteSize);
|
|
}
|
|
|
|
// In-place growth failed.
|
|
if (!success) {
|
|
success =
|
|
((reallocated = static_cast<ElementWrapper*>(
|
|
mallocx(newByteSize, MALLOCX_ZERO))) != nullptr);
|
|
}
|
|
|
|
if (success) {
|
|
// Expand to real size
|
|
assert(newByteSize / sizeof(ElementWrapper) >= newCapacity);
|
|
newCapacity = newByteSize / sizeof(ElementWrapper);
|
|
} else {
|
|
throw std::bad_alloc();
|
|
}
|
|
} else { // no jemalloc
|
|
// calloc() is simpler than malloc() followed by memset(), and
|
|
// potentially faster when dealing with a lot of memory, as it can get
|
|
// already-zeroed pages from the kernel.
|
|
reallocated = static_cast<ElementWrapper*>(
|
|
calloc(newCapacity, sizeof(ElementWrapper)));
|
|
if (!reallocated) {
|
|
throw std::bad_alloc();
|
|
}
|
|
}
|
|
|
|
return reallocated;
|
|
}
|
|
|
|
/**
|
|
* Reserve enough space in the ThreadEntry::elements for the item
|
|
* @id to fit in.
|
|
*/
|
|
|
|
void StaticMetaBase::reserve(EntryID* id) {
|
|
auto& meta = *this;
|
|
ThreadEntry* threadEntry = (*threadEntry_)();
|
|
size_t prevCapacity = threadEntry->getElementsCapacity();
|
|
|
|
uint32_t idval = id->getOrAllocate(meta);
|
|
if (prevCapacity > idval) {
|
|
return;
|
|
}
|
|
|
|
size_t newCapacity;
|
|
ElementWrapper* reallocated = reallocate(threadEntry, idval, newCapacity);
|
|
|
|
// Success, update the entry
|
|
{
|
|
std::lock_guard<std::mutex> g(meta.lock_);
|
|
|
|
if (reallocated) {
|
|
/*
|
|
* Note: we need to hold the meta lock when copying data out of
|
|
* the old vector, because some other thread might be
|
|
* destructing a ThreadLocal and writing to the elements vector
|
|
* of this thread.
|
|
*/
|
|
if (prevCapacity != 0) {
|
|
memcpy(
|
|
reallocated,
|
|
threadEntry->elements,
|
|
sizeof(*reallocated) * prevCapacity);
|
|
}
|
|
std::swap(reallocated, threadEntry->elements);
|
|
}
|
|
|
|
for (size_t i = prevCapacity; i < newCapacity; i++) {
|
|
threadEntry->elements[i].node.initZero(threadEntry, i);
|
|
}
|
|
|
|
threadEntry->setElementsCapacity(newCapacity);
|
|
}
|
|
|
|
free(reallocated);
|
|
}
|
|
|
|
void StaticMetaBase::reserveHeadUnlocked(uint32_t id) {
|
|
if (head_.getElementsCapacity() <= id) {
|
|
size_t prevCapacity = head_.getElementsCapacity();
|
|
size_t newCapacity;
|
|
ElementWrapper* reallocated = reallocate(&head_, id, newCapacity);
|
|
|
|
if (reallocated) {
|
|
if (prevCapacity != 0) {
|
|
memcpy(
|
|
reallocated, head_.elements, sizeof(*reallocated) * prevCapacity);
|
|
}
|
|
std::swap(reallocated, head_.elements);
|
|
}
|
|
|
|
for (size_t i = prevCapacity; i < newCapacity; i++) {
|
|
head_.elements[i].node.init(&head_, i);
|
|
}
|
|
|
|
head_.setElementsCapacity(newCapacity);
|
|
free(reallocated);
|
|
}
|
|
}
|
|
|
|
void StaticMetaBase::pushBackLocked(ThreadEntry* t, uint32_t id) {
|
|
if (LIKELY(!t->removed_)) {
|
|
std::lock_guard<std::mutex> g(lock_);
|
|
auto* node = &t->elements[id].node;
|
|
node->push_back(&head_);
|
|
}
|
|
}
|
|
|
|
void StaticMetaBase::pushBackUnlocked(ThreadEntry* t, uint32_t id) {
|
|
if (LIKELY(!t->removed_)) {
|
|
auto* node = &t->elements[id].node;
|
|
node->push_back(&head_);
|
|
}
|
|
}
|
|
|
|
FOLLY_STATIC_CTOR_PRIORITY_MAX
|
|
PthreadKeyUnregister PthreadKeyUnregister::instance_;
|
|
} // namespace threadlocal_detail
|
|
} // namespace folly
|