205 lines
6.7 KiB
C++
205 lines
6.7 KiB
C++
/*
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <folly/detail/MemoryIdler.h>
|
|
|
|
#include <folly/GLog.h>
|
|
#include <folly/Portability.h>
|
|
#include <folly/ScopeGuard.h>
|
|
#include <folly/concurrency/CacheLocality.h>
|
|
#include <folly/memory/MallctlHelper.h>
|
|
#include <folly/memory/Malloc.h>
|
|
#include <folly/portability/PThread.h>
|
|
#include <folly/portability/SysMman.h>
|
|
#include <folly/portability/Unistd.h>
|
|
#include <folly/synchronization/CallOnce.h>
|
|
|
|
#include <climits>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <utility>
|
|
|
|
namespace folly {
|
|
namespace detail {
|
|
|
|
AtomicStruct<std::chrono::steady_clock::duration>
|
|
MemoryIdler::defaultIdleTimeout(std::chrono::seconds(5));
|
|
|
|
void MemoryIdler::flushLocalMallocCaches() {
|
|
if (!usingJEMalloc()) {
|
|
return;
|
|
}
|
|
if (!mallctl || !mallctlnametomib || !mallctlbymib) {
|
|
FB_LOG_EVERY_MS(ERROR, 10000) << "mallctl* weak link failed";
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Not using mallctlCall as this will fail if tcache is disabled.
|
|
mallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0);
|
|
|
|
// By default jemalloc has 4 arenas per cpu, and then assigns each
|
|
// thread to one of those arenas. This means that in any service
|
|
// that doesn't perform a lot of context switching, the chances that
|
|
// another thread will be using the current thread's arena (and hence
|
|
// doing the appropriate dirty-page purging) are low. Some good
|
|
// tuned configurations (such as that used by hhvm) use fewer arenas
|
|
// and then pin threads to avoid contended access. In that case,
|
|
// purging the arenas is counter-productive. We use the heuristic
|
|
// that if narenas <= 2 * num_cpus then we shouldn't do anything here,
|
|
// which detects when the narenas has been reduced from the default
|
|
unsigned narenas;
|
|
unsigned arenaForCurrent;
|
|
size_t mib[3];
|
|
size_t miblen = 3;
|
|
|
|
mallctlRead("opt.narenas", &narenas);
|
|
mallctlRead("thread.arena", &arenaForCurrent);
|
|
if (narenas > 2 * CacheLocality::system().numCpus &&
|
|
mallctlnametomib("arena.0.purge", mib, &miblen) == 0) {
|
|
mib[1] = static_cast<size_t>(arenaForCurrent);
|
|
mallctlbymib(mib, miblen, nullptr, nullptr, nullptr, 0);
|
|
}
|
|
} catch (const std::runtime_error& ex) {
|
|
FB_LOG_EVERY_MS(WARNING, 10000) << ex.what();
|
|
}
|
|
}
|
|
|
|
// Stack madvise isn't Linux or glibc specific, but the system calls
|
|
// and arithmetic (and bug compatibility) are not portable. The set of
|
|
// platforms could be increased if it was useful.
|
|
#if (FOLLY_X64 || FOLLY_PPC64) && defined(_GNU_SOURCE) && \
|
|
defined(__linux__) && !FOLLY_MOBILE && !FOLLY_SANITIZE_ADDRESS
|
|
|
|
static FOLLY_TLS uintptr_t tls_stackLimit;
|
|
static FOLLY_TLS size_t tls_stackSize;
|
|
|
|
static size_t pageSize() {
|
|
static const size_t s_pageSize = sysconf(_SC_PAGESIZE);
|
|
return s_pageSize;
|
|
}
|
|
|
|
static void fetchStackLimits() {
|
|
int err;
|
|
pthread_attr_t attr;
|
|
if ((err = pthread_getattr_np(pthread_self(), &attr))) {
|
|
// some restricted environments can't access /proc
|
|
static folly::once_flag flag;
|
|
folly::call_once(flag, [err]() {
|
|
LOG(WARNING) << "pthread_getaddr_np failed errno=" << err;
|
|
});
|
|
|
|
tls_stackSize = 1;
|
|
return;
|
|
}
|
|
SCOPE_EXIT {
|
|
pthread_attr_destroy(&attr);
|
|
};
|
|
|
|
void* addr;
|
|
size_t rawSize;
|
|
if ((err = pthread_attr_getstack(&attr, &addr, &rawSize))) {
|
|
// unexpected, but it is better to continue in prod than do nothing
|
|
FB_LOG_EVERY_MS(ERROR, 10000) << "pthread_attr_getstack error " << err;
|
|
assert(false);
|
|
tls_stackSize = 1;
|
|
return;
|
|
}
|
|
if (rawSize >= (1ULL << 32)) {
|
|
// Avoid unmapping huge swaths of memory if there is an insane
|
|
// stack size. The boundary of sanity is somewhat arbitrary: 4GB.
|
|
//
|
|
// If we went into /proc to find the actual contiguous mapped pages
|
|
// before unmapping we wouldn't care about the stack size at all,
|
|
// but our current strategy is to unmap the entire range that might
|
|
// be used for the stack even if it hasn't been fully faulted-in.
|
|
//
|
|
// Very large stack size is a bug (hence the assert), but we can
|
|
// carry on if we are in prod.
|
|
FB_LOG_EVERY_MS(ERROR, 10000)
|
|
<< "pthread_attr_getstack returned insane stack size " << rawSize;
|
|
assert(false);
|
|
tls_stackSize = 1;
|
|
return;
|
|
}
|
|
assert(addr != nullptr);
|
|
assert(rawSize >= PTHREAD_STACK_MIN);
|
|
|
|
// glibc subtracts guard page from stack size, even though pthread docs
|
|
// seem to imply the opposite
|
|
size_t guardSize;
|
|
if (pthread_attr_getguardsize(&attr, &guardSize) != 0) {
|
|
guardSize = 0;
|
|
}
|
|
assert(rawSize > guardSize);
|
|
|
|
// stack goes down, so guard page adds to the base addr
|
|
tls_stackLimit = reinterpret_cast<uintptr_t>(addr) + guardSize;
|
|
tls_stackSize = rawSize - guardSize;
|
|
|
|
assert((tls_stackLimit & (pageSize() - 1)) == 0);
|
|
}
|
|
|
|
FOLLY_NOINLINE static uintptr_t getStackPtr() {
|
|
char marker;
|
|
auto rv = reinterpret_cast<uintptr_t>(&marker);
|
|
return rv;
|
|
}
|
|
|
|
void MemoryIdler::unmapUnusedStack(size_t retain) {
|
|
if (tls_stackSize == 0) {
|
|
fetchStackLimits();
|
|
}
|
|
if (tls_stackSize <= std::max(static_cast<size_t>(1), retain)) {
|
|
// covers both missing stack info, and impossibly large retain
|
|
return;
|
|
}
|
|
|
|
auto sp = getStackPtr();
|
|
assert(sp >= tls_stackLimit);
|
|
assert(sp - tls_stackLimit < tls_stackSize);
|
|
|
|
auto end = (sp - retain) & ~(pageSize() - 1);
|
|
if (end <= tls_stackLimit) {
|
|
// no pages are eligible for unmapping
|
|
return;
|
|
}
|
|
|
|
size_t len = end - tls_stackLimit;
|
|
assert((len & (pageSize() - 1)) == 0);
|
|
if (madvise((void*)tls_stackLimit, len, MADV_DONTNEED) != 0) {
|
|
// It is likely that the stack vma hasn't been fully grown. In this
|
|
// case madvise will apply dontneed to the present vmas, then return
|
|
// errno of ENOMEM.
|
|
// If thread stack pages are backed by locked or huge pages, madvise will
|
|
// fail with EINVAL. (EINVAL may also be returned if the address or length
|
|
// are bad.) Warn in debug mode, since MemoryIdler may not function as
|
|
// expected.
|
|
// We can also get an EAGAIN, theoretically.
|
|
PLOG_IF(WARNING, kIsDebug && errno == EINVAL) << "madvise failed";
|
|
assert(errno == EAGAIN || errno == ENOMEM || errno == EINVAL);
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
void MemoryIdler::unmapUnusedStack(size_t /* retain */) {}
|
|
|
|
#endif
|
|
|
|
} // namespace detail
|
|
} // namespace folly
|