vn-verdnaturachat/ios/Pods/Flipper-Folly/folly/system/MemoryMapping.cpp

472 lines
12 KiB
C++

/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/system/MemoryMapping.h>
#include <algorithm>
#include <cerrno>
#include <utility>
#include <glog/logging.h>
#include <folly/Format.h>
#include <folly/Portability.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/SysMman.h>
#include <folly/portability/SysSyscall.h>
#ifdef __linux__
#include <folly/experimental/io/HugePages.h> // @manual
#endif
#include <fcntl.h>
#include <sys/types.h>
#include <system_error>
static constexpr ssize_t kDefaultMlockChunkSize = !folly::kMscVer
// Linux implementations of unmap/mlock/munlock take a kernel
// semaphore and block other threads from doing other memory
// operations. Split the operations in chunks.
? (1 << 20) // 1MB
// MSVC doesn't have this problem, and calling munmap many times
// with the same address is a bad idea with the windows implementation.
: (-1);
DEFINE_int64(
mlock_chunk_size,
kDefaultMlockChunkSize,
"Maximum bytes to mlock/munlock/munmap at once "
"(will be rounded up to PAGESIZE). Ignored if negative.");
namespace folly {
namespace {
enum mmap_flags : int {
#ifdef MAP_POPULATE
populate = MAP_POPULATE,
#else
populate = 0,
#endif
};
} // namespace
MemoryMapping::MemoryMapping(MemoryMapping&& other) noexcept {
swap(other);
}
MemoryMapping::MemoryMapping(
File file,
off_t offset,
off_t length,
Options options)
: file_(std::move(file)), options_(options) {
CHECK(file_);
init(offset, length);
}
MemoryMapping::MemoryMapping(
const char* name,
off_t offset,
off_t length,
Options options)
: MemoryMapping(
File(name, options.writable ? O_RDWR : O_RDONLY),
offset,
length,
options) {}
MemoryMapping::MemoryMapping(
int fd,
off_t offset,
off_t length,
Options options)
: MemoryMapping(File(fd), offset, length, options) {}
MemoryMapping::MemoryMapping(AnonymousType, off_t length, Options options)
: options_(options) {
init(0, length);
}
namespace {
#ifdef __linux__
void getDeviceOptions(dev_t device, off_t& pageSize, bool& autoExtend) {
auto ps = getHugePageSizeForDevice(device);
if (ps) {
pageSize = ps->size;
autoExtend = true;
}
}
#else
inline void getDeviceOptions(dev_t, off_t&, bool&) {}
#endif
} // namespace
void MemoryMapping::init(off_t offset, off_t length) {
const bool grow = options_.grow;
const bool anon = !file_;
CHECK(!(grow && anon));
off_t& pageSize = options_.pageSize;
struct stat st;
// On Linux, hugetlbfs file systems don't require ftruncate() to grow the
// file, and (on kernels before 2.6.24) don't even allow it. Also, the file
// size is always a multiple of the page size.
bool autoExtend = false;
if (!anon) {
// Stat the file
CHECK_ERR(fstat(file_.fd(), &st));
if (pageSize == 0) {
getDeviceOptions(st.st_dev, pageSize, autoExtend);
}
} else {
DCHECK(!file_);
DCHECK_EQ(offset, 0);
CHECK_EQ(pageSize, 0);
CHECK_GE(length, 0);
}
if (pageSize == 0) {
pageSize = off_t(sysconf(_SC_PAGESIZE));
}
CHECK_GT(pageSize, 0);
CHECK_EQ(pageSize & (pageSize - 1), 0); // power of two
CHECK_GE(offset, 0);
// Round down the start of the mapped region
off_t skipStart = offset % pageSize;
offset -= skipStart;
mapLength_ = length;
if (mapLength_ != -1) {
mapLength_ += skipStart;
// Round up the end of the mapped region
mapLength_ = (mapLength_ + pageSize - 1) / pageSize * pageSize;
}
off_t remaining = anon ? length : st.st_size - offset;
if (mapLength_ == -1) {
length = mapLength_ = remaining;
} else {
if (length > remaining) {
if (grow) {
if (!autoExtend) {
PCHECK(0 == ftruncate(file_.fd(), offset + length))
<< "ftruncate() failed, couldn't grow file to "
<< offset + length;
remaining = length;
} else {
// Extend mapping to multiple of page size, don't use ftruncate
remaining = mapLength_;
}
} else {
length = remaining;
}
}
if (mapLength_ > remaining) {
mapLength_ = remaining;
}
}
if (length == 0) {
mapLength_ = 0;
mapStart_ = nullptr;
} else {
int flags = options_.shared ? MAP_SHARED : MAP_PRIVATE;
if (anon) {
flags |= MAP_ANONYMOUS;
}
if (options_.prefault) {
flags |= mmap_flags::populate;
}
// The standard doesn't actually require PROT_NONE to be zero...
int prot = PROT_NONE;
if (options_.readable || options_.writable) {
prot =
((options_.readable ? PROT_READ : 0) |
(options_.writable ? PROT_WRITE : 0));
}
auto start = static_cast<unsigned char*>(mmap(
options_.address, size_t(mapLength_), prot, flags, file_.fd(), offset));
PCHECK(start != MAP_FAILED)
<< " offset=" << offset << " length=" << mapLength_;
mapStart_ = start;
data_.reset(start + skipStart, size_t(length));
}
}
namespace {
off_t memOpChunkSize(off_t length, off_t pageSize) {
off_t chunkSize = length;
if (FLAGS_mlock_chunk_size <= 0) {
return chunkSize;
}
chunkSize = off_t(FLAGS_mlock_chunk_size);
off_t r = chunkSize % pageSize;
if (r) {
chunkSize += (pageSize - r);
}
return chunkSize;
}
/**
* Run @op in chunks over the buffer @mem of @bufSize length.
*
* Return:
* - success: true + amountSucceeded == bufSize (op success on whole buffer)
* - failure: false + amountSucceeded == nr bytes on which op succeeded.
*/
template <typename Op>
bool memOpInChunks(
Op op,
void* mem,
size_t bufSize,
off_t pageSize,
size_t& amountSucceeded) {
// Linux' unmap/mlock/munlock take a kernel semaphore and block other threads
// from doing other memory operations. If the size of the buffer is big the
// semaphore can be down for seconds (for benchmarks see
// http://kostja-osipov.livejournal.com/42963.html). Doing the operations in
// chunks breaks the locking into intervals and lets other threads do memory
// operations of their own.
auto chunkSize = size_t(memOpChunkSize(off_t(bufSize), pageSize));
auto addr = static_cast<char*>(mem);
amountSucceeded = 0;
while (amountSucceeded < bufSize) {
size_t size = std::min(chunkSize, bufSize - amountSucceeded);
if (op(addr + amountSucceeded, size) != 0) {
return false;
}
amountSucceeded += size;
}
return true;
}
} // namespace
int mlock2wrapper(
const void* addr,
size_t len,
MemoryMapping::LockFlags flags) {
int intFlags = 0;
if (flags.lockOnFault) {
// MLOCK_ONFAULT, only available in non-portable headers.
intFlags |= 0x01;
}
#if defined(__GLIBC__) && !defined(__APPLE__)
#if __GLIBC_PREREQ(2, 27)
return mlock2(addr, len, intFlags);
#elif defined(SYS_mlock2)
// SYS_mlock2 is defined in Linux headers since 4.4
return syscall(SYS_mlock2, addr, len, intFlags);
#else // !__GLIBC_PREREQ(2, 27) && !defined(SYS_mlock2)
errno = ENOSYS;
return -1;
#endif
#else // !defined(__GLIBC__) || defined(__APPLE__)
errno = ENOSYS;
return -1;
#endif
}
bool MemoryMapping::mlock(LockMode mode, LockFlags flags) {
size_t amountSucceeded = 0;
locked_ = memOpInChunks(
[flags](void* addr, size_t len) -> int {
// If no flags are set, mlock2() behaves exactly the same as
// mlock(). Prefer the portable variant.
return flags == LockFlags{} ? ::mlock(addr, len)
: mlock2wrapper(addr, len, flags);
},
mapStart_,
size_t(mapLength_),
options_.pageSize,
amountSucceeded);
if (locked_) {
return true;
}
auto msg =
folly::format("mlock({}) failed at {}", mapLength_, amountSucceeded);
if (mode == LockMode::TRY_LOCK && errno == EPERM) {
PLOG(WARNING) << msg;
} else if (mode == LockMode::TRY_LOCK && errno == ENOMEM) {
VLOG(1) << msg;
} else {
PLOG(FATAL) << msg;
}
// only part of the buffer was mlocked, unlock it back
if (!memOpInChunks(
::munlock,
mapStart_,
amountSucceeded,
options_.pageSize,
amountSucceeded)) {
PLOG(WARNING) << "munlock()";
}
return false;
}
void MemoryMapping::munlock(bool dontneed) {
if (!locked_) {
return;
}
size_t amountSucceeded = 0;
if (!memOpInChunks(
::munlock,
mapStart_,
size_t(mapLength_),
options_.pageSize,
amountSucceeded)) {
PLOG(WARNING) << "munlock()";
}
if (mapLength_ && dontneed &&
::madvise(mapStart_, size_t(mapLength_), MADV_DONTNEED)) {
PLOG(WARNING) << "madvise()";
}
locked_ = false;
}
void MemoryMapping::hintLinearScan() {
advise(MADV_SEQUENTIAL);
}
MemoryMapping::~MemoryMapping() {
if (mapLength_) {
size_t amountSucceeded = 0;
if (!memOpInChunks(
::munmap,
mapStart_,
size_t(mapLength_),
options_.pageSize,
amountSucceeded)) {
PLOG(FATAL) << folly::format(
"munmap({}) failed at {}", mapLength_, amountSucceeded);
}
}
}
void MemoryMapping::advise(int advice) const {
advise(advice, 0, size_t(mapLength_));
}
void MemoryMapping::advise(int advice, size_t offset, size_t length) const {
CHECK_LE(offset + length, size_t(mapLength_))
<< " offset: " << offset << " length: " << length
<< " mapLength_: " << mapLength_;
// Include the entire start page: round down to page boundary.
const auto offMisalign = offset % options_.pageSize;
offset -= offMisalign;
length += offMisalign;
// Round the last page down to page boundary.
if (offset + length != size_t(mapLength_)) {
length -= length % options_.pageSize;
}
if (length == 0) {
return;
}
char* mapStart = static_cast<char*>(mapStart_) + offset;
PLOG_IF(WARNING, ::madvise(mapStart, length, advice)) << "madvise";
}
MemoryMapping& MemoryMapping::operator=(MemoryMapping&& other) {
swap(other);
return *this;
}
void MemoryMapping::swap(MemoryMapping& other) noexcept {
using std::swap;
swap(this->file_, other.file_);
swap(this->mapStart_, other.mapStart_);
swap(this->mapLength_, other.mapLength_);
swap(this->options_, other.options_);
swap(this->locked_, other.locked_);
swap(this->data_, other.data_);
}
void swap(MemoryMapping& a, MemoryMapping& b) noexcept {
a.swap(b);
}
void alignedForwardMemcpy(void* dst, const void* src, size_t size) {
assert(reinterpret_cast<uintptr_t>(src) % alignof(unsigned long) == 0);
assert(reinterpret_cast<uintptr_t>(dst) % alignof(unsigned long) == 0);
auto srcl = static_cast<const unsigned long*>(src);
auto dstl = static_cast<unsigned long*>(dst);
while (size >= sizeof(unsigned long)) {
*dstl++ = *srcl++;
size -= sizeof(unsigned long);
}
auto srcc = reinterpret_cast<const unsigned char*>(srcl);
auto dstc = reinterpret_cast<unsigned char*>(dstl);
while (size != 0) {
*dstc++ = *srcc++;
--size;
}
}
void mmapFileCopy(const char* src, const char* dest, mode_t mode) {
MemoryMapping srcMap(src);
srcMap.hintLinearScan();
MemoryMapping destMap(
File(dest, O_RDWR | O_CREAT | O_TRUNC, mode),
0,
off_t(srcMap.range().size()),
MemoryMapping::writable());
alignedForwardMemcpy(
destMap.writableRange().data(),
srcMap.range().data(),
srcMap.range().size());
}
bool MemoryMapping::LockFlags::operator==(const LockFlags& other) const {
return lockOnFault == other.lockOnFault;
}
} // namespace folly