/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include namespace folly { /** * StampedPtr packs both a pointer to T and a uint16_t into a 64-bit value, * exploiting the fact that current addresses are limited to 48 bits on * all current x86-64 and ARM64 processors. * * For both x86-64 and ARM64, 64-bit pointers have a canonical * form in which the upper 16 bits are equal to bit 47. Intel has * announced a 57-bit addressing mode (see https://software.intel.com/ * sites/default/files/managed/2b/80/5-level_paging_white_paper.pdf), * but it is not yet available. The first problematic platform will * probably be ARMv8.2, which supports 52-bit virtual addresses. * * This code works on all of the platforms I have available for test, * and probably on all currently-shipping platforms that have a hope of * compiling folly. Rather than enumerating the supported platforms via * ifdef, this code dynamically validates its packing assumption in debug * builds on each call to a mutating function. Presumably by the time we * are running this process in an operating system image that can address * more than 256TB of memory, RAM cost and the latency of 128-bit CAS * will have improved enough that this optimization is no longer impactful. * * A common approach to this kind of packing seems to be to just assume * the top 16 bits are zero, but https://github.com/LuaJIT/LuaJIT/issues/49 * indicates that ARM64 platforms in the wild are actually setting bit 47 * in their stack addresses. That means that we need to extend bit 47 to * do the right thing (it's not expensive, it compiles to one instruction * on x86-64 and arm64). * * Compare to PackedSyncPtr and DiscriminatedPtr, which perform similar * packing but add additional functionality. The name is taken from * Java's AtomicStampedReference. Unlike PackedSyncPtr, which tries to * act pointer-like, this class acts more like a pair whose elements are * named ptr and stamp. It also allows direct access to the internal * raw field: since we're already at the metal you might want to play * additional games. It is guaranteed that a zero raw value gets decoded * as a (ptr,stamp) of (nullptr,0). */ template struct StampedPtr { /** * The packing is not guaranteed, except that it is guaranteed that * raw == 0 iff ptr() == nullptr && stamp() == 0. */ uint64_t raw; /* IMPORTANT: default initialization doesn't result in a sane state */ T* ptr() const { return unpackPtr(raw); } uint16_t stamp() const { return unpackStamp(raw); } void set(T* ptr, uint16_t stamp) { raw = pack(ptr, stamp); } void setPtr(T* ptr) { raw = pack(ptr, unpackStamp(raw)); } void setStamp(uint16_t stamp) { raw = pack(unpackPtr(raw), stamp); } static T* unpackPtr(uint64_t raw) { // Canonical form means we need to extend bit 47 of the pointer to // bits 48..63 (unless the operating system never hands those pointers // to us, which is difficult to prove). Signed right-shift of a // negative number is implementation-defined in C++ (not undefined!), // but actually does the right thing on all the platforms I can find. auto extended = static_cast(raw) >> kInternalStampBits; return reinterpret_cast(static_cast(extended)); } static uint16_t unpackStamp(uint64_t raw) { return static_cast(raw); } static uint64_t pack(T* ptr, uint16_t stamp) { auto shifted = static_cast(reinterpret_cast(ptr)) << kInternalStampBits; uint64_t raw = shifted | stamp; FOLLY_SAFE_DCHECK(unpackPtr(raw) == ptr, "ptr mismatch."); FOLLY_SAFE_DCHECK(unpackStamp(raw) == stamp, "stamp mismatch."); return raw; } private: // On 32-bit platforms it works okay to store a ptr in the top 48 // bits of a 64-bit value, but it will result in unnecessary work. // If we align the pointer part at word granularity when we have the // space then no shifting will ever be needed. static constexpr unsigned kInternalStampBits = sizeof(void*) == 4 ? 32 : 16; }; template StampedPtr makeStampedPtr(T* ptr, uint16_t stamp) { return StampedPtr{StampedPtr::pack(ptr, stamp)}; } } // namespace folly