/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include constexpr auto kSmallGrowthFactor = 1.1; constexpr auto kBigGrowthFactor = 1.7; namespace folly { namespace threadlocal_detail { void ThreadEntryNode::initIfZero(bool locked) { if (UNLIKELY(!next)) { if (LIKELY(locked)) { parent->meta->pushBackLocked(parent, id); } else { parent->meta->pushBackUnlocked(parent, id); } } } void ThreadEntryNode::push_back(ThreadEntry* head) { // get the head prev and next nodes ThreadEntryNode* hnode = &head->elements[id].node; // update current next = head; prev = hnode->prev; // hprev ThreadEntryNode* hprev = &hnode->prev->elements[id].node; hprev->next = parent; hnode->prev = parent; } void ThreadEntryNode::eraseZero() { if (LIKELY(prev != nullptr)) { // get the prev and next nodes ThreadEntryNode* nprev = &prev->elements[id].node; ThreadEntryNode* nnext = &next->elements[id].node; // update the prev and next nnext->prev = prev; nprev->next = next; // set the prev and next to nullptr next = prev = nullptr; } } StaticMetaBase::StaticMetaBase(ThreadEntry* (*threadEntry)(), bool strict) : nextId_(1), threadEntry_(threadEntry), strict_(strict) { int ret = pthread_key_create(&pthreadKey_, &onThreadExit); checkPosixError(ret, "pthread_key_create failed"); PthreadKeyUnregister::registerKey(pthreadKey_); } ThreadEntryList* StaticMetaBase::getThreadEntryList() { #ifdef FOLLY_TLD_USE_FOLLY_TLS static FOLLY_TLS ThreadEntryList threadEntryListSingleton; return &threadEntryListSingleton; #else class PthreadKey { public: PthreadKey() { int ret = pthread_key_create(&pthreadKey_, nullptr); checkPosixError(ret, "pthread_key_create failed"); PthreadKeyUnregister::registerKey(pthreadKey_); } FOLLY_ALWAYS_INLINE pthread_key_t get() const { return pthreadKey_; } private: pthread_key_t pthreadKey_; }; auto& instance = detail::createGlobal(); ThreadEntryList* threadEntryList = static_cast(pthread_getspecific(instance.get())); if (UNLIKELY(!threadEntryList)) { threadEntryList = new ThreadEntryList(); int ret = pthread_setspecific(instance.get(), threadEntryList); checkPosixError(ret, "pthread_setspecific failed"); } return threadEntryList; #endif } bool StaticMetaBase::dying() { for (auto te = getThreadEntryList()->head; te; te = te->listNext) { if (te->removed_) { return true; } } return false; } void StaticMetaBase::onThreadExit(void* ptr) { auto threadEntry = static_cast(ptr); { auto& meta = *threadEntry->meta; // Make sure this ThreadEntry is available if ThreadLocal A is accessed in // ThreadLocal B destructor. pthread_setspecific(meta.pthreadKey_, threadEntry); SharedMutex::ReadHolder rlock(nullptr); if (meta.strict_) { rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_); } { std::lock_guard g(meta.lock_); // mark it as removed threadEntry->removed_ = true; auto elementsCapacity = threadEntry->getElementsCapacity(); for (size_t i = 0u; i < elementsCapacity; ++i) { threadEntry->elements[i].node.eraseZero(); } // No need to hold the lock any longer; the ThreadEntry is private to this // thread now that it's been removed from meta. } // NOTE: User-provided deleter / object dtor itself may be using ThreadLocal // with the same Tag, so dispose() calls below may (re)create some of the // elements or even increase elementsCapacity, thus multiple cleanup rounds // may be required. for (bool shouldRun = true; shouldRun;) { shouldRun = false; auto elementsCapacity = threadEntry->getElementsCapacity(); FOR_EACH_RANGE (i, 0, elementsCapacity) { if (threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) { threadEntry->elements[i].cleanup(); shouldRun = true; } } } pthread_setspecific(meta.pthreadKey_, nullptr); } auto threadEntryList = threadEntry->list; DCHECK_GT(threadEntryList->count, 0u); --threadEntryList->count; if (threadEntryList->count) { return; } // dispose all the elements for (bool shouldRunOuter = true; shouldRunOuter;) { shouldRunOuter = false; auto tmp = threadEntryList->head; while (tmp) { auto& meta = *tmp->meta; pthread_setspecific(meta.pthreadKey_, tmp); SharedMutex::ReadHolder rlock(nullptr); if (meta.strict_) { rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_); } for (bool shouldRunInner = true; shouldRunInner;) { shouldRunInner = false; auto elementsCapacity = tmp->getElementsCapacity(); FOR_EACH_RANGE (i, 0, elementsCapacity) { if (tmp->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) { tmp->elements[i].cleanup(); shouldRunInner = true; shouldRunOuter = true; } } } pthread_setspecific(meta.pthreadKey_, nullptr); tmp = tmp->listNext; } } // free the entry list auto head = threadEntryList->head; threadEntryList->head = nullptr; while (head) { auto tmp = head; head = head->listNext; if (tmp->elements) { free(tmp->elements); tmp->elements = nullptr; tmp->setElementsCapacity(0); } #ifndef FOLLY_TLD_USE_FOLLY_TLS delete tmp; #endif } #ifndef FOLLY_TLD_USE_FOLLY_TLS delete threadEntryList; #endif } uint32_t StaticMetaBase::elementsCapacity() const { ThreadEntry* threadEntry = (*threadEntry_)(); return FOLLY_LIKELY(!!threadEntry) ? threadEntry->getElementsCapacity() : 0; } uint32_t StaticMetaBase::allocate(EntryID* ent) { uint32_t id; auto& meta = *this; std::lock_guard g(meta.lock_); id = ent->value.load(); if (id != kEntryIDInvalid) { return id; } if (!meta.freeIds_.empty()) { id = meta.freeIds_.back(); meta.freeIds_.pop_back(); } else { id = meta.nextId_++; } uint32_t old_id = ent->value.exchange(id); DCHECK_EQ(old_id, kEntryIDInvalid); reserveHeadUnlocked(id); return id; } void StaticMetaBase::destroy(EntryID* ent) { try { auto& meta = *this; // Elements in other threads that use this id. std::vector elements; { SharedMutex::WriteHolder wlock(nullptr); if (meta.strict_) { /* * In strict mode, the logic guarantees per-thread instances are * destroyed by the moment ThreadLocal<> dtor returns. * In order to achieve that, we should wait until concurrent * onThreadExit() calls (that might acquire ownership over per-thread * instances in order to destroy them) are finished. */ wlock = SharedMutex::WriteHolder(meta.accessAllThreadsLock_); } { std::lock_guard g(meta.lock_); uint32_t id = ent->value.exchange(kEntryIDInvalid); if (id == kEntryIDInvalid) { return; } auto& node = meta.head_.elements[id].node; while (!node.empty()) { auto* next = node.getNext(); next->eraseZero(); ThreadEntry* e = next->parent; auto elementsCapacity = e->getElementsCapacity(); if (id < elementsCapacity && e->elements[id].ptr) { elements.push_back(e->elements[id]); /* * Writing another thread's ThreadEntry from here is fine; * the only other potential reader is the owning thread -- * from onThreadExit (which grabs the lock, so is properly * synchronized with us) or from get(), which also grabs * the lock if it needs to resize the elements vector. * * We can't conflict with reads for a get(id), because * it's illegal to call get on a thread local that's * destructing. */ e->elements[id].ptr = nullptr; e->elements[id].deleter1 = nullptr; e->elements[id].ownsDeleter = false; } } meta.freeIds_.push_back(id); } } // Delete elements outside the locks. for (ElementWrapper& elem : elements) { if (elem.dispose(TLPDestructionMode::ALL_THREADS)) { elem.cleanup(); } } } catch (...) { // Just in case we get a lock error or something anyway... LOG(WARNING) << "Destructor discarding an exception that was thrown."; } } ElementWrapper* StaticMetaBase::reallocate( ThreadEntry* threadEntry, uint32_t idval, size_t& newCapacity) { size_t prevCapacity = threadEntry->getElementsCapacity(); // Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent // very slow start. auto smallCapacity = static_cast((idval + 5) * kSmallGrowthFactor); auto bigCapacity = static_cast((idval + 5) * kBigGrowthFactor); newCapacity = (threadEntry->meta && (bigCapacity <= threadEntry->meta->head_.getElementsCapacity())) ? bigCapacity : smallCapacity; assert(newCapacity > prevCapacity); ElementWrapper* reallocated = nullptr; // Need to grow. Note that we can't call realloc, as elements is // still linked in meta, so another thread might access invalid memory // after realloc succeeds. We'll copy by hand and update our ThreadEntry // under the lock. if (usingJEMalloc()) { bool success = false; size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0); // Try to grow in place. // // Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory, // even if a previous allocation allocated more than we requested. // This is fine; we always use MALLOCX_ZERO with jemalloc and we // always expand our allocation to the real size. if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) { success = (xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO) == newByteSize); } // In-place growth failed. if (!success) { success = ((reallocated = static_cast( mallocx(newByteSize, MALLOCX_ZERO))) != nullptr); } if (success) { // Expand to real size assert(newByteSize / sizeof(ElementWrapper) >= newCapacity); newCapacity = newByteSize / sizeof(ElementWrapper); } else { throw std::bad_alloc(); } } else { // no jemalloc // calloc() is simpler than malloc() followed by memset(), and // potentially faster when dealing with a lot of memory, as it can get // already-zeroed pages from the kernel. reallocated = static_cast( calloc(newCapacity, sizeof(ElementWrapper))); if (!reallocated) { throw std::bad_alloc(); } } return reallocated; } /** * Reserve enough space in the ThreadEntry::elements for the item * @id to fit in. */ void StaticMetaBase::reserve(EntryID* id) { auto& meta = *this; ThreadEntry* threadEntry = (*threadEntry_)(); size_t prevCapacity = threadEntry->getElementsCapacity(); uint32_t idval = id->getOrAllocate(meta); if (prevCapacity > idval) { return; } size_t newCapacity; ElementWrapper* reallocated = reallocate(threadEntry, idval, newCapacity); // Success, update the entry { std::lock_guard g(meta.lock_); if (reallocated) { /* * Note: we need to hold the meta lock when copying data out of * the old vector, because some other thread might be * destructing a ThreadLocal and writing to the elements vector * of this thread. */ if (prevCapacity != 0) { memcpy( reallocated, threadEntry->elements, sizeof(*reallocated) * prevCapacity); } std::swap(reallocated, threadEntry->elements); } for (size_t i = prevCapacity; i < newCapacity; i++) { threadEntry->elements[i].node.initZero(threadEntry, i); } threadEntry->setElementsCapacity(newCapacity); } free(reallocated); } void StaticMetaBase::reserveHeadUnlocked(uint32_t id) { if (head_.getElementsCapacity() <= id) { size_t prevCapacity = head_.getElementsCapacity(); size_t newCapacity; ElementWrapper* reallocated = reallocate(&head_, id, newCapacity); if (reallocated) { if (prevCapacity != 0) { memcpy( reallocated, head_.elements, sizeof(*reallocated) * prevCapacity); } std::swap(reallocated, head_.elements); } for (size_t i = prevCapacity; i < newCapacity; i++) { head_.elements[i].node.init(&head_, i); } head_.setElementsCapacity(newCapacity); free(reallocated); } } void StaticMetaBase::pushBackLocked(ThreadEntry* t, uint32_t id) { if (LIKELY(!t->removed_)) { std::lock_guard g(lock_); auto* node = &t->elements[id].node; node->push_back(&head_); } } void StaticMetaBase::pushBackUnlocked(ThreadEntry* t, uint32_t id) { if (LIKELY(!t->removed_)) { auto* node = &t->elements[id].node; node->push_back(&head_); } } FOLLY_STATIC_CTOR_PRIORITY_MAX PthreadKeyUnregister PthreadKeyUnregister::instance_; } // namespace threadlocal_detail } // namespace folly