2431 lines
74 KiB
C
2431 lines
74 KiB
C
|
/*
|
||
|
* Copyright 2017-present Facebook, Inc.
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
#pragma once
|
||
|
|
||
|
#include <cstddef>
|
||
|
#include <cstdint>
|
||
|
#include <cstring>
|
||
|
|
||
|
#include <array>
|
||
|
#include <iterator>
|
||
|
#include <limits>
|
||
|
#include <memory>
|
||
|
#include <new>
|
||
|
#include <type_traits>
|
||
|
#include <utility>
|
||
|
#include <vector>
|
||
|
|
||
|
#include <folly/Bits.h>
|
||
|
#include <folly/ConstexprMath.h>
|
||
|
#include <folly/Likely.h>
|
||
|
#include <folly/Portability.h>
|
||
|
#include <folly/ScopeGuard.h>
|
||
|
#include <folly/Traits.h>
|
||
|
#include <folly/functional/ApplyTuple.h>
|
||
|
#include <folly/functional/Invoke.h>
|
||
|
#include <folly/lang/Align.h>
|
||
|
#include <folly/lang/Assume.h>
|
||
|
#include <folly/lang/Exception.h>
|
||
|
#include <folly/lang/Launder.h>
|
||
|
#include <folly/lang/SafeAssert.h>
|
||
|
#include <folly/portability/Builtins.h>
|
||
|
|
||
|
#include <folly/container/detail/F14Defaults.h>
|
||
|
#include <folly/container/detail/F14IntrinsicsAvailability.h>
|
||
|
|
||
|
#if FOLLY_ASAN_ENABLED && defined(FOLLY_TLS)
|
||
|
#define FOLLY_F14_TLS_IF_ASAN FOLLY_TLS
|
||
|
#else
|
||
|
#define FOLLY_F14_TLS_IF_ASAN
|
||
|
#endif
|
||
|
|
||
|
#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
|
||
|
|
||
|
#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
|
||
|
#if FOLLY_NEON
|
||
|
#include <arm_acle.h> // __crc32cd
|
||
|
#else
|
||
|
#include <nmmintrin.h> // _mm_crc32_u64
|
||
|
#endif
|
||
|
#else
|
||
|
#ifdef _WIN32
|
||
|
#include <intrin.h> // _mul128 in fallback bit mixer
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#if FOLLY_NEON
|
||
|
#include <arm_neon.h> // uint8x16t intrinsics
|
||
|
#else // SSE2
|
||
|
#include <immintrin.h> // __m128i intrinsics
|
||
|
#include <xmmintrin.h> // _mm_prefetch
|
||
|
#endif
|
||
|
|
||
|
#endif
|
||
|
|
||
|
namespace folly {
|
||
|
|
||
|
struct F14TableStats {
|
||
|
char const* policy;
|
||
|
std::size_t size{0};
|
||
|
std::size_t valueSize{0};
|
||
|
std::size_t bucketCount{0};
|
||
|
std::size_t chunkCount{0};
|
||
|
std::vector<std::size_t> chunkOccupancyHisto;
|
||
|
std::vector<std::size_t> chunkOutboundOverflowHisto;
|
||
|
std::vector<std::size_t> chunkHostedOverflowHisto;
|
||
|
std::vector<std::size_t> keyProbeLengthHisto;
|
||
|
std::vector<std::size_t> missProbeLengthHisto;
|
||
|
std::size_t totalBytes{0};
|
||
|
std::size_t overheadBytes{0};
|
||
|
|
||
|
private:
|
||
|
template <typename T>
|
||
|
static auto computeHelper(T const* m) -> decltype(m->computeStats()) {
|
||
|
return m->computeStats();
|
||
|
}
|
||
|
|
||
|
static F14TableStats computeHelper(...) {
|
||
|
return {};
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
template <typename T>
|
||
|
static F14TableStats compute(T const& m) {
|
||
|
return computeHelper(&m);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
namespace f14 {
|
||
|
namespace detail {
|
||
|
|
||
|
template <F14IntrinsicsMode>
|
||
|
struct F14LinkCheck {};
|
||
|
|
||
|
template <>
|
||
|
struct F14LinkCheck<getF14IntrinsicsMode()> {
|
||
|
// The purpose of this method is to trigger a link failure if
|
||
|
// compilation flags vary across compilation units. The definition
|
||
|
// is in F14Table.cpp, so only one of F14LinkCheck<None>::check,
|
||
|
// F14LinkCheck<Simd>::check, or F14LinkCheck<SimdAndCrc>::check will
|
||
|
// be available at link time.
|
||
|
//
|
||
|
// To cause a link failure the function must be invoked in code that
|
||
|
// is not optimized away, so we call it on a couple of cold paths
|
||
|
// (exception handling paths in copy construction and rehash). LTO may
|
||
|
// remove it entirely, but that's fine.
|
||
|
static void check() noexcept;
|
||
|
};
|
||
|
|
||
|
#if defined(_LIBCPP_VERSION)
|
||
|
|
||
|
template <typename K, typename V, typename H>
|
||
|
struct StdNodeReplica {
|
||
|
void* next;
|
||
|
std::size_t hash;
|
||
|
V value;
|
||
|
};
|
||
|
|
||
|
#else
|
||
|
|
||
|
template <typename H>
|
||
|
struct StdIsFastHash : std::true_type {};
|
||
|
template <>
|
||
|
struct StdIsFastHash<std::hash<long double>> : std::false_type {};
|
||
|
template <typename... Args>
|
||
|
struct StdIsFastHash<std::hash<std::basic_string<Args...>>> : std::false_type {
|
||
|
};
|
||
|
|
||
|
// TODO: add specialization for std::basic_string_view
|
||
|
|
||
|
// mimic internal node of unordered containers in STL to estimate the size
|
||
|
template <typename K, typename V, typename H, typename Enable = void>
|
||
|
struct StdNodeReplica {
|
||
|
void* next;
|
||
|
V value;
|
||
|
};
|
||
|
template <typename K, typename V, typename H>
|
||
|
struct StdNodeReplica<
|
||
|
K,
|
||
|
V,
|
||
|
H,
|
||
|
std::enable_if_t<
|
||
|
!StdIsFastHash<H>::value || !is_nothrow_invocable<H, K>::value>> {
|
||
|
void* next;
|
||
|
V value;
|
||
|
std::size_t hash;
|
||
|
};
|
||
|
|
||
|
#endif
|
||
|
|
||
|
} // namespace detail
|
||
|
} // namespace f14
|
||
|
|
||
|
#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
|
||
|
namespace f14 {
|
||
|
namespace detail {
|
||
|
template <typename Policy>
|
||
|
class F14Table;
|
||
|
} // namespace detail
|
||
|
} // namespace f14
|
||
|
|
||
|
class F14HashToken final {
|
||
|
public:
|
||
|
F14HashToken() = default;
|
||
|
|
||
|
private:
|
||
|
using HashPair = std::pair<std::size_t, std::size_t>;
|
||
|
|
||
|
explicit F14HashToken(HashPair hp) : hp_(hp) {}
|
||
|
explicit operator HashPair() const {
|
||
|
return hp_;
|
||
|
}
|
||
|
|
||
|
HashPair hp_;
|
||
|
|
||
|
template <typename Policy>
|
||
|
friend class f14::detail::F14Table;
|
||
|
};
|
||
|
|
||
|
namespace f14 {
|
||
|
namespace detail {
|
||
|
//// Defaults should be selected using void
|
||
|
|
||
|
template <typename Arg, typename Default>
|
||
|
using VoidDefault =
|
||
|
std::conditional_t<std::is_same<Arg, Default>::value, void, Arg>;
|
||
|
|
||
|
template <typename Arg, typename Default>
|
||
|
using Defaulted =
|
||
|
typename std::conditional_t<std::is_same<Arg, void>::value, Default, Arg>;
|
||
|
|
||
|
template <
|
||
|
typename TableKey,
|
||
|
typename Hasher,
|
||
|
typename KeyEqual,
|
||
|
typename ArgKey,
|
||
|
typename Void = void>
|
||
|
struct EligibleForHeterogeneousFind : std::false_type {};
|
||
|
|
||
|
template <
|
||
|
typename TableKey,
|
||
|
typename Hasher,
|
||
|
typename KeyEqual,
|
||
|
typename ArgKey>
|
||
|
struct EligibleForHeterogeneousFind<
|
||
|
TableKey,
|
||
|
Hasher,
|
||
|
KeyEqual,
|
||
|
ArgKey,
|
||
|
void_t<typename Hasher::is_transparent, typename KeyEqual::is_transparent>>
|
||
|
: std::true_type {};
|
||
|
|
||
|
template <
|
||
|
typename TableKey,
|
||
|
typename Hasher,
|
||
|
typename KeyEqual,
|
||
|
typename ArgKey>
|
||
|
using EligibleForHeterogeneousInsert = Conjunction<
|
||
|
EligibleForHeterogeneousFind<TableKey, Hasher, KeyEqual, ArgKey>,
|
||
|
std::is_constructible<TableKey, ArgKey>>;
|
||
|
|
||
|
template <
|
||
|
typename TableKey,
|
||
|
typename Hasher,
|
||
|
typename KeyEqual,
|
||
|
typename KeyArg0OrBool,
|
||
|
typename... KeyArgs>
|
||
|
using KeyTypeForEmplaceHelper = std::conditional_t<
|
||
|
sizeof...(KeyArgs) == 1 &&
|
||
|
(std::is_same<remove_cvref_t<KeyArg0OrBool>, TableKey>::value ||
|
||
|
EligibleForHeterogeneousFind<
|
||
|
TableKey,
|
||
|
Hasher,
|
||
|
KeyEqual,
|
||
|
KeyArg0OrBool>::value),
|
||
|
KeyArg0OrBool&&,
|
||
|
TableKey>;
|
||
|
|
||
|
template <
|
||
|
typename TableKey,
|
||
|
typename Hasher,
|
||
|
typename KeyEqual,
|
||
|
typename... KeyArgs>
|
||
|
using KeyTypeForEmplace = KeyTypeForEmplaceHelper<
|
||
|
TableKey,
|
||
|
Hasher,
|
||
|
KeyEqual,
|
||
|
std::tuple_element_t<0, std::tuple<KeyArgs..., bool>>,
|
||
|
KeyArgs...>;
|
||
|
|
||
|
////////////////
|
||
|
|
||
|
template <typename T>
|
||
|
FOLLY_ALWAYS_INLINE static void prefetchAddr(T const* ptr) {
|
||
|
#ifndef _WIN32
|
||
|
__builtin_prefetch(static_cast<void const*>(ptr));
|
||
|
#elif FOLLY_NEON
|
||
|
__prefetch(static_cast<void const*>(ptr));
|
||
|
#else
|
||
|
_mm_prefetch(
|
||
|
static_cast<char const*>(static_cast<void const*>(ptr)), _MM_HINT_T0);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
FOLLY_ALWAYS_INLINE static unsigned findFirstSetNonZero(T mask) {
|
||
|
assume(mask != 0);
|
||
|
if (sizeof(mask) == sizeof(unsigned)) {
|
||
|
return __builtin_ctz(static_cast<unsigned>(mask));
|
||
|
} else {
|
||
|
return __builtin_ctzll(mask);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if FOLLY_NEON
|
||
|
using TagVector = uint8x16_t;
|
||
|
|
||
|
using MaskType = uint64_t;
|
||
|
|
||
|
constexpr unsigned kMaskSpacing = 4;
|
||
|
#else // SSE2
|
||
|
using TagVector = __m128i;
|
||
|
|
||
|
using MaskType = uint32_t;
|
||
|
|
||
|
constexpr unsigned kMaskSpacing = 1;
|
||
|
#endif
|
||
|
|
||
|
// We could use unaligned loads to relax this requirement, but that
|
||
|
// would be both a performance penalty and require a bulkier packed
|
||
|
// ItemIter format
|
||
|
constexpr std::size_t kRequiredVectorAlignment =
|
||
|
constexpr_max(std::size_t{16}, alignof(max_align_t));
|
||
|
|
||
|
using EmptyTagVectorType = std::aligned_storage_t<
|
||
|
sizeof(TagVector) + kRequiredVectorAlignment,
|
||
|
alignof(max_align_t)>;
|
||
|
|
||
|
extern EmptyTagVectorType kEmptyTagVector;
|
||
|
|
||
|
extern FOLLY_F14_TLS_IF_ASAN std::size_t asanPendingSafeInserts;
|
||
|
extern FOLLY_F14_TLS_IF_ASAN std::size_t asanRehashState;
|
||
|
|
||
|
template <unsigned BitCount>
|
||
|
struct FullMask {
|
||
|
static constexpr MaskType value =
|
||
|
(FullMask<BitCount - 1>::value << kMaskSpacing) + 1;
|
||
|
};
|
||
|
|
||
|
template <>
|
||
|
struct FullMask<1> : std::integral_constant<MaskType, 1> {};
|
||
|
|
||
|
#if FOLLY_ARM
|
||
|
// Mask iteration is different for ARM because that is the only platform
|
||
|
// for which the mask is bigger than a register.
|
||
|
|
||
|
// Iterates a mask, optimized for the case that only a few bits are set
|
||
|
class SparseMaskIter {
|
||
|
static_assert(kMaskSpacing == 4, "");
|
||
|
|
||
|
uint32_t interleavedMask_;
|
||
|
|
||
|
public:
|
||
|
explicit SparseMaskIter(MaskType mask)
|
||
|
: interleavedMask_{static_cast<uint32_t>(((mask >> 32) << 2) | mask)} {}
|
||
|
|
||
|
bool hasNext() {
|
||
|
return interleavedMask_ != 0;
|
||
|
}
|
||
|
|
||
|
unsigned next() {
|
||
|
FOLLY_SAFE_DCHECK(hasNext(), "");
|
||
|
unsigned i = findFirstSetNonZero(interleavedMask_);
|
||
|
interleavedMask_ &= (interleavedMask_ - 1);
|
||
|
return ((i >> 2) | (i << 2)) & 0xf;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Iterates a mask, optimized for the case that most bits are set
|
||
|
class DenseMaskIter {
|
||
|
static_assert(kMaskSpacing == 4, "");
|
||
|
|
||
|
std::size_t count_;
|
||
|
unsigned index_;
|
||
|
uint8_t const* tags_;
|
||
|
|
||
|
public:
|
||
|
explicit DenseMaskIter(uint8_t const* tags, MaskType mask) {
|
||
|
if (mask == 0) {
|
||
|
count_ = 0;
|
||
|
} else {
|
||
|
count_ = popcount(static_cast<uint32_t>(((mask >> 32) << 2) | mask));
|
||
|
if (LIKELY((mask & 1) != 0)) {
|
||
|
index_ = 0;
|
||
|
} else {
|
||
|
index_ = findFirstSetNonZero(mask) / kMaskSpacing;
|
||
|
}
|
||
|
tags_ = tags;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool hasNext() {
|
||
|
return count_ > 0;
|
||
|
}
|
||
|
|
||
|
unsigned next() {
|
||
|
auto rv = index_;
|
||
|
--count_;
|
||
|
if (count_ > 0) {
|
||
|
do {
|
||
|
++index_;
|
||
|
} while ((tags_[index_] & 0x80) == 0);
|
||
|
}
|
||
|
FOLLY_SAFE_DCHECK(index_ < 16, "");
|
||
|
return rv;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
#else
|
||
|
// Iterates a mask, optimized for the case that only a few bits are set
|
||
|
class SparseMaskIter {
|
||
|
MaskType mask_;
|
||
|
|
||
|
public:
|
||
|
explicit SparseMaskIter(MaskType mask) : mask_{mask} {}
|
||
|
|
||
|
bool hasNext() {
|
||
|
return mask_ != 0;
|
||
|
}
|
||
|
|
||
|
unsigned next() {
|
||
|
FOLLY_SAFE_DCHECK(hasNext(), "");
|
||
|
unsigned i = findFirstSetNonZero(mask_);
|
||
|
mask_ &= (mask_ - 1);
|
||
|
return i / kMaskSpacing;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Iterates a mask, optimized for the case that most bits are set
|
||
|
class DenseMaskIter {
|
||
|
MaskType mask_;
|
||
|
unsigned index_{0};
|
||
|
|
||
|
public:
|
||
|
explicit DenseMaskIter(uint8_t const*, MaskType mask) : mask_{mask} {}
|
||
|
|
||
|
bool hasNext() {
|
||
|
return mask_ != 0;
|
||
|
}
|
||
|
|
||
|
unsigned next() {
|
||
|
FOLLY_SAFE_DCHECK(hasNext(), "");
|
||
|
if (LIKELY((mask_ & 1) != 0)) {
|
||
|
mask_ >>= kMaskSpacing;
|
||
|
return index_++;
|
||
|
} else {
|
||
|
unsigned s = findFirstSetNonZero(mask_);
|
||
|
unsigned rv = index_ + (s / kMaskSpacing);
|
||
|
mask_ >>= (s + kMaskSpacing);
|
||
|
index_ = rv + 1;
|
||
|
return rv;
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
// Iterates a mask, returning pairs of [begin,end) index covering blocks
|
||
|
// of set bits
|
||
|
class MaskRangeIter {
|
||
|
MaskType mask_;
|
||
|
unsigned shift_{0};
|
||
|
|
||
|
public:
|
||
|
explicit MaskRangeIter(MaskType mask) {
|
||
|
// If kMaskSpacing is > 1 then there will be empty bits even for
|
||
|
// contiguous ranges. Fill them in.
|
||
|
mask_ = mask * ((1 << kMaskSpacing) - 1);
|
||
|
}
|
||
|
|
||
|
bool hasNext() {
|
||
|
return mask_ != 0;
|
||
|
}
|
||
|
|
||
|
std::pair<unsigned, unsigned> next() {
|
||
|
FOLLY_SAFE_DCHECK(hasNext(), "");
|
||
|
auto s = shift_;
|
||
|
unsigned b = findFirstSetNonZero(mask_);
|
||
|
unsigned e = findFirstSetNonZero(~(mask_ | (mask_ - 1)));
|
||
|
mask_ >>= e;
|
||
|
shift_ = s + e;
|
||
|
return std::make_pair((s + b) / kMaskSpacing, (s + e) / kMaskSpacing);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Holds the result of an index query that has an optional result,
|
||
|
// interpreting a mask of 0 to be the empty answer and the index of the
|
||
|
// last set bit to be the non-empty answer
|
||
|
class LastOccupiedInMask {
|
||
|
MaskType mask_;
|
||
|
|
||
|
public:
|
||
|
explicit LastOccupiedInMask(MaskType mask) : mask_{mask} {}
|
||
|
|
||
|
bool hasIndex() const {
|
||
|
return mask_ != 0;
|
||
|
}
|
||
|
|
||
|
unsigned index() const {
|
||
|
assume(mask_ != 0);
|
||
|
return (findLastSet(mask_) - 1) / kMaskSpacing;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Holds the result of an index query that has an optional result,
|
||
|
// interpreting a mask of 0 to be the empty answer and the index of the
|
||
|
// first set bit to be the non-empty answer
|
||
|
class FirstEmptyInMask {
|
||
|
MaskType mask_;
|
||
|
|
||
|
public:
|
||
|
explicit FirstEmptyInMask(MaskType mask) : mask_{mask} {}
|
||
|
|
||
|
bool hasIndex() const {
|
||
|
return mask_ != 0;
|
||
|
}
|
||
|
|
||
|
unsigned index() const {
|
||
|
FOLLY_SAFE_DCHECK(mask_ != 0, "");
|
||
|
return findFirstSetNonZero(mask_) / kMaskSpacing;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <typename ItemType>
|
||
|
struct alignas(kRequiredVectorAlignment) F14Chunk {
|
||
|
using Item = ItemType;
|
||
|
|
||
|
// For our 16 byte vector alignment (and assuming alignof(Item) >=
|
||
|
// 4) kCapacity of 14 is the most space efficient. Slightly smaller
|
||
|
// or larger capacities can help with cache alignment in a couple of
|
||
|
// cases without wasting too much space, but once the items are larger
|
||
|
// then we're unlikely to get much benefit anyway. The only case we
|
||
|
// optimize is using kCapacity of 12 for 4 byte items, which makes the
|
||
|
// chunk take exactly 1 cache line, and adding 16 bytes of padding for
|
||
|
// 16 byte items so that a chunk takes exactly 4 cache lines.
|
||
|
static constexpr unsigned kCapacity = sizeof(Item) == 4 ? 12 : 14;
|
||
|
|
||
|
static constexpr unsigned kDesiredCapacity = kCapacity - 2;
|
||
|
|
||
|
static constexpr unsigned kAllocatedCapacity =
|
||
|
kCapacity + (sizeof(Item) == 16 ? 1 : 0);
|
||
|
|
||
|
static constexpr MaskType kFullMask = FullMask<kCapacity>::value;
|
||
|
|
||
|
// Non-empty tags have their top bit set. tags_ array might be bigger
|
||
|
// than kCapacity to keep alignment of first item.
|
||
|
std::array<uint8_t, 14> tags_;
|
||
|
|
||
|
// Bits 0..3 record the actual capacity of the chunk if this is chunk
|
||
|
// zero, or hold 0000 for other chunks. Bits 4-7 are a 4-bit counter
|
||
|
// of the number of values in this chunk that were placed because they
|
||
|
// overflowed their desired chunk (hostedOverflowCount).
|
||
|
uint8_t control_;
|
||
|
|
||
|
// The number of values that would have been placed into this chunk if
|
||
|
// there had been space, including values that also overflowed previous
|
||
|
// full chunks. This value saturates; once it becomes 255 it no longer
|
||
|
// increases nor decreases.
|
||
|
uint8_t outboundOverflowCount_;
|
||
|
|
||
|
std::array<
|
||
|
std::aligned_storage_t<sizeof(Item), alignof(Item)>,
|
||
|
kAllocatedCapacity>
|
||
|
rawItems_;
|
||
|
|
||
|
static F14Chunk* emptyInstance() {
|
||
|
auto raw = reinterpret_cast<char*>(&kEmptyTagVector);
|
||
|
if (kRequiredVectorAlignment > alignof(max_align_t)) {
|
||
|
auto delta = kRequiredVectorAlignment -
|
||
|
(reinterpret_cast<uintptr_t>(raw) % kRequiredVectorAlignment);
|
||
|
raw += delta;
|
||
|
}
|
||
|
auto rv = reinterpret_cast<F14Chunk*>(raw);
|
||
|
FOLLY_SAFE_DCHECK(
|
||
|
(reinterpret_cast<uintptr_t>(rv) % kRequiredVectorAlignment) == 0, "");
|
||
|
return rv;
|
||
|
}
|
||
|
|
||
|
void clear() {
|
||
|
// tags_ = {}; control_ = 0; outboundOverflowCount_ = 0;
|
||
|
|
||
|
// gcc < 6 doesn't exploit chunk alignment to generate the optimal
|
||
|
// SSE clear from memset. This is very hot code, so it is worth
|
||
|
// handling that case specially.
|
||
|
#if FOLLY_SSE >= 2 && __GNUC__ <= 5 && !__clang__
|
||
|
// this doesn't violate strict aliasing rules because __m128i is
|
||
|
// tagged as __may_alias__
|
||
|
auto* v = static_cast<__m128i*>(static_cast<void*>(&tags_[0]));
|
||
|
_mm_store_si128(v, _mm_setzero_si128());
|
||
|
#else
|
||
|
std::memset(&tags_[0], '\0', 16);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void copyOverflowInfoFrom(F14Chunk const& rhs) {
|
||
|
FOLLY_SAFE_DCHECK(hostedOverflowCount() == 0, "");
|
||
|
control_ += static_cast<uint8_t>(rhs.control_ & 0xf0);
|
||
|
outboundOverflowCount_ = rhs.outboundOverflowCount_;
|
||
|
}
|
||
|
|
||
|
unsigned hostedOverflowCount() const {
|
||
|
return control_ >> 4;
|
||
|
}
|
||
|
|
||
|
static constexpr uint8_t kIncrHostedOverflowCount = 0x10;
|
||
|
static constexpr uint8_t kDecrHostedOverflowCount =
|
||
|
static_cast<uint8_t>(-0x10);
|
||
|
|
||
|
void adjustHostedOverflowCount(uint8_t op) {
|
||
|
control_ += op;
|
||
|
}
|
||
|
|
||
|
bool eof() const {
|
||
|
return (control_ & 0xf) != 0;
|
||
|
}
|
||
|
|
||
|
std::size_t chunk0Capacity() const {
|
||
|
return control_ & 0xf;
|
||
|
}
|
||
|
|
||
|
void markEof(std::size_t c0c) {
|
||
|
FOLLY_SAFE_DCHECK(
|
||
|
this != emptyInstance() && control_ == 0 && c0c > 0 && c0c <= 0xf &&
|
||
|
c0c <= kCapacity,
|
||
|
"");
|
||
|
control_ = static_cast<uint8_t>(c0c);
|
||
|
}
|
||
|
|
||
|
unsigned outboundOverflowCount() const {
|
||
|
return outboundOverflowCount_;
|
||
|
}
|
||
|
|
||
|
void incrOutboundOverflowCount() {
|
||
|
if (outboundOverflowCount_ != 255) {
|
||
|
++outboundOverflowCount_;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void decrOutboundOverflowCount() {
|
||
|
if (outboundOverflowCount_ != 255) {
|
||
|
--outboundOverflowCount_;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::size_t tag(std::size_t index) const {
|
||
|
return tags_[index];
|
||
|
}
|
||
|
|
||
|
void setTag(std::size_t index, std::size_t tag) {
|
||
|
FOLLY_SAFE_DCHECK(
|
||
|
this != emptyInstance() && tag >= 0x80 && tag <= 0xff, "");
|
||
|
tags_[index] = static_cast<uint8_t>(tag);
|
||
|
}
|
||
|
|
||
|
void clearTag(std::size_t index) {
|
||
|
tags_[index] = 0;
|
||
|
}
|
||
|
|
||
|
#if FOLLY_NEON
|
||
|
////////
|
||
|
// Tag filtering using NEON intrinsics
|
||
|
|
||
|
SparseMaskIter tagMatchIter(std::size_t needle) const {
|
||
|
FOLLY_SAFE_DCHECK(needle >= 0x80 && needle < 0x100, "");
|
||
|
uint8x16_t tagV = vld1q_u8(&tags_[0]);
|
||
|
auto needleV = vdupq_n_u8(static_cast<uint8_t>(needle));
|
||
|
auto eqV = vceqq_u8(tagV, needleV);
|
||
|
// get info from every byte into the bottom half of every uint16_t
|
||
|
// by shifting right 4, then round to get it into a 64-bit vector
|
||
|
uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(eqV), 4);
|
||
|
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(maskV), 0) & kFullMask;
|
||
|
return SparseMaskIter(mask);
|
||
|
}
|
||
|
|
||
|
MaskType occupiedMask() const {
|
||
|
uint8x16_t tagV = vld1q_u8(&tags_[0]);
|
||
|
// signed shift extends top bit to all bits
|
||
|
auto occupiedV =
|
||
|
vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u8(tagV), 7));
|
||
|
uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(occupiedV), 4);
|
||
|
return vget_lane_u64(vreinterpret_u64_u8(maskV), 0) & kFullMask;
|
||
|
}
|
||
|
#else
|
||
|
////////
|
||
|
// Tag filtering using SSE2 intrinsics
|
||
|
|
||
|
TagVector const* tagVector() const {
|
||
|
return static_cast<TagVector const*>(static_cast<void const*>(&tags_[0]));
|
||
|
}
|
||
|
|
||
|
SparseMaskIter tagMatchIter(std::size_t needle) const {
|
||
|
FOLLY_SAFE_DCHECK(needle >= 0x80 && needle < 0x100, "");
|
||
|
auto tagV = _mm_load_si128(tagVector());
|
||
|
|
||
|
// TRICKY! It may seem strange to have a std::size_t needle and narrow
|
||
|
// it at the last moment, rather than making HashPair::second be a
|
||
|
// uint8_t, but the latter choice sometimes leads to a performance
|
||
|
// problem.
|
||
|
//
|
||
|
// On architectures with SSE2 but not AVX2, _mm_set1_epi8 expands
|
||
|
// to multiple instructions. One of those is a MOVD of either 4 or
|
||
|
// 8 byte width. Only the bottom byte of that move actually affects
|
||
|
// the result, but if a 1-byte needle has been spilled then this will
|
||
|
// be a 4 byte load. GCC 5.5 has been observed to reload needle
|
||
|
// (or perhaps fuse a reload and part of a previous static_cast)
|
||
|
// needle using a MOVZX with a 1 byte load in parallel with the MOVD.
|
||
|
// This combination causes a failure of store-to-load forwarding,
|
||
|
// which has a big performance penalty (60 nanoseconds per find on
|
||
|
// a microbenchmark). Keeping needle >= 4 bytes avoids the problem
|
||
|
// and also happens to result in slightly more compact assembly.
|
||
|
auto needleV = _mm_set1_epi8(static_cast<uint8_t>(needle));
|
||
|
auto eqV = _mm_cmpeq_epi8(tagV, needleV);
|
||
|
auto mask = _mm_movemask_epi8(eqV) & kFullMask;
|
||
|
return SparseMaskIter{mask};
|
||
|
}
|
||
|
|
||
|
MaskType occupiedMask() const {
|
||
|
auto tagV = _mm_load_si128(tagVector());
|
||
|
return _mm_movemask_epi8(tagV) & kFullMask;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
DenseMaskIter occupiedIter() const {
|
||
|
return DenseMaskIter{&tags_[0], occupiedMask()};
|
||
|
}
|
||
|
|
||
|
MaskRangeIter occupiedRangeIter() const {
|
||
|
return MaskRangeIter{occupiedMask()};
|
||
|
}
|
||
|
|
||
|
LastOccupiedInMask lastOccupied() const {
|
||
|
return LastOccupiedInMask{occupiedMask()};
|
||
|
}
|
||
|
|
||
|
FirstEmptyInMask firstEmpty() const {
|
||
|
return FirstEmptyInMask{occupiedMask() ^ kFullMask};
|
||
|
}
|
||
|
|
||
|
bool occupied(std::size_t index) const {
|
||
|
FOLLY_SAFE_DCHECK(tags_[index] == 0 || (tags_[index] & 0x80) != 0, "");
|
||
|
return tags_[index] != 0;
|
||
|
}
|
||
|
|
||
|
Item* itemAddr(std::size_t i) const {
|
||
|
return static_cast<Item*>(
|
||
|
const_cast<void*>(static_cast<void const*>(&rawItems_[i])));
|
||
|
}
|
||
|
|
||
|
Item& item(std::size_t i) {
|
||
|
FOLLY_SAFE_DCHECK(this->occupied(i), "");
|
||
|
return *launder(itemAddr(i));
|
||
|
}
|
||
|
|
||
|
Item const& citem(std::size_t i) const {
|
||
|
FOLLY_SAFE_DCHECK(this->occupied(i), "");
|
||
|
return *launder(itemAddr(i));
|
||
|
}
|
||
|
|
||
|
static F14Chunk& owner(Item& item, std::size_t index) {
|
||
|
auto rawAddr =
|
||
|
static_cast<uint8_t*>(static_cast<void*>(std::addressof(item))) -
|
||
|
offsetof(F14Chunk, rawItems_) - index * sizeof(Item);
|
||
|
auto chunkAddr = static_cast<F14Chunk*>(static_cast<void*>(rawAddr));
|
||
|
FOLLY_SAFE_DCHECK(std::addressof(item) == chunkAddr->itemAddr(index), "");
|
||
|
return *chunkAddr;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
////////////////
|
||
|
|
||
|
// PackedChunkItemPtr points to an Item in an F14Chunk, allowing both the
|
||
|
// Item& and its index to be recovered. It sorts by the address of the
|
||
|
// item, and it only works for items that are in a properly-aligned chunk.
|
||
|
|
||
|
// generic form, not actually packed
|
||
|
template <typename Ptr>
|
||
|
class PackedChunkItemPtr {
|
||
|
public:
|
||
|
PackedChunkItemPtr(Ptr p, std::size_t i) noexcept : ptr_{p}, index_{i} {
|
||
|
FOLLY_SAFE_DCHECK(ptr_ != nullptr || index_ == 0, "");
|
||
|
}
|
||
|
|
||
|
Ptr ptr() const {
|
||
|
return ptr_;
|
||
|
}
|
||
|
|
||
|
std::size_t index() const {
|
||
|
return index_;
|
||
|
}
|
||
|
|
||
|
bool operator<(PackedChunkItemPtr const& rhs) const {
|
||
|
FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
|
||
|
return ptr_ < rhs.ptr_;
|
||
|
}
|
||
|
|
||
|
bool operator==(PackedChunkItemPtr const& rhs) const {
|
||
|
FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
|
||
|
return ptr_ == rhs.ptr_;
|
||
|
}
|
||
|
|
||
|
bool operator!=(PackedChunkItemPtr const& rhs) const {
|
||
|
return !(*this == rhs);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
Ptr ptr_;
|
||
|
std::size_t index_;
|
||
|
};
|
||
|
|
||
|
// Bare pointer form, packed into a uintptr_t. Uses only bits wasted by
|
||
|
// alignment, so it works on 32-bit and 64-bit platforms
|
||
|
template <typename T>
|
||
|
class PackedChunkItemPtr<T*> {
|
||
|
static_assert((alignof(F14Chunk<T>) % 16) == 0, "");
|
||
|
|
||
|
// Chunks are 16-byte aligned, so we can maintain a packed pointer to a
|
||
|
// chunk item by packing the 4-bit item index into the least significant
|
||
|
// bits of a pointer to the chunk itself. This makes ItemIter::pack
|
||
|
// more expensive, however, since it has to compute the chunk address.
|
||
|
//
|
||
|
// Chunk items have varying alignment constraints, so it would seem
|
||
|
// to be that we can't do a similar trick while using only bit masking
|
||
|
// operations on the Item* itself. It happens to be, however, that if
|
||
|
// sizeof(Item) is not a multiple of 16 then we can recover a portion
|
||
|
// of the index bits from the knowledge that the Item-s are stored in
|
||
|
// an array that is itself 16-byte aligned.
|
||
|
//
|
||
|
// If kAlignBits is the number of trailing zero bits in sizeof(Item)
|
||
|
// (up to 4), then we can borrow those bits to store kAlignBits of the
|
||
|
// index directly. We can recover (4 - kAlignBits) bits of the index
|
||
|
// from the item pointer itself, by defining/observing that
|
||
|
//
|
||
|
// A = kAlignBits (A <= 4)
|
||
|
//
|
||
|
// S = (sizeof(Item) % 16) >> A (shifted-away bits are all zero)
|
||
|
//
|
||
|
// R = (itemPtr % 16) >> A (shifted-away bits are all zero)
|
||
|
//
|
||
|
// M = 16 >> A
|
||
|
//
|
||
|
// itemPtr % 16 = (index * sizeof(Item)) % 16
|
||
|
//
|
||
|
// (R * 2^A) % 16 = (index * (sizeof(Item) % 16)) % 16
|
||
|
//
|
||
|
// (R * 2^A) % 16 = (index * 2^A * S) % 16
|
||
|
//
|
||
|
// R % M = (index * S) % M
|
||
|
//
|
||
|
// S is relatively prime with M, so a multiplicative inverse is easy
|
||
|
// to compute
|
||
|
//
|
||
|
// Sinv = S^(M - 1) % M
|
||
|
//
|
||
|
// (R * Sinv) % M = index % M
|
||
|
//
|
||
|
// This lets us recover the bottom bits of the index. When sizeof(T)
|
||
|
// is 8-byte aligned kSizeInverse will always be 1. When sizeof(T)
|
||
|
// is 4-byte aligned kSizeInverse will be either 1 or 3.
|
||
|
|
||
|
// returns pow(x, y) % m
|
||
|
static constexpr uintptr_t powerMod(uintptr_t x, uintptr_t y, uintptr_t m) {
|
||
|
return y == 0 ? 1 : (x * powerMod(x, y - 1, m)) % m;
|
||
|
}
|
||
|
|
||
|
static constexpr uintptr_t kIndexBits = 4;
|
||
|
static constexpr uintptr_t kIndexMask = (uintptr_t{1} << kIndexBits) - 1;
|
||
|
|
||
|
static constexpr uintptr_t kAlignBits = constexpr_min(
|
||
|
uintptr_t{4},
|
||
|
constexpr_find_first_set(uintptr_t{sizeof(T)}) - 1);
|
||
|
|
||
|
static constexpr uintptr_t kAlignMask = (uintptr_t{1} << kAlignBits) - 1;
|
||
|
|
||
|
static constexpr uintptr_t kModulus = uintptr_t{1}
|
||
|
<< (kIndexBits - kAlignBits);
|
||
|
static constexpr uintptr_t kSizeInverse =
|
||
|
powerMod(sizeof(T) >> kAlignBits, kModulus - 1, kModulus);
|
||
|
|
||
|
public:
|
||
|
PackedChunkItemPtr(T* p, std::size_t i) noexcept {
|
||
|
uintptr_t encoded = i >> (kIndexBits - kAlignBits);
|
||
|
assume((encoded & ~kAlignMask) == 0);
|
||
|
raw_ = reinterpret_cast<uintptr_t>(p) | encoded;
|
||
|
FOLLY_SAFE_DCHECK(p == ptr(), "");
|
||
|
FOLLY_SAFE_DCHECK(i == index(), "");
|
||
|
}
|
||
|
|
||
|
T* ptr() const {
|
||
|
return reinterpret_cast<T*>(raw_ & ~kAlignMask);
|
||
|
}
|
||
|
|
||
|
std::size_t index() const {
|
||
|
auto encoded = (raw_ & kAlignMask) << (kIndexBits - kAlignBits);
|
||
|
auto deduced =
|
||
|
((raw_ >> kAlignBits) * kSizeInverse) & (kIndexMask >> kAlignBits);
|
||
|
return encoded | deduced;
|
||
|
}
|
||
|
|
||
|
bool operator<(PackedChunkItemPtr const& rhs) const {
|
||
|
return raw_ < rhs.raw_;
|
||
|
}
|
||
|
bool operator==(PackedChunkItemPtr const& rhs) const {
|
||
|
return raw_ == rhs.raw_;
|
||
|
}
|
||
|
bool operator!=(PackedChunkItemPtr const& rhs) const {
|
||
|
return !(*this == rhs);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
uintptr_t raw_;
|
||
|
};
|
||
|
|
||
|
template <typename ChunkPtr>
|
||
|
class F14ItemIter {
|
||
|
private:
|
||
|
using Chunk = typename std::pointer_traits<ChunkPtr>::element_type;
|
||
|
|
||
|
public:
|
||
|
using Item = typename Chunk::Item;
|
||
|
using ItemPtr = typename std::pointer_traits<ChunkPtr>::template rebind<Item>;
|
||
|
using ItemConstPtr =
|
||
|
typename std::pointer_traits<ChunkPtr>::template rebind<Item const>;
|
||
|
|
||
|
using Packed = PackedChunkItemPtr<ItemPtr>;
|
||
|
|
||
|
//// PUBLIC
|
||
|
|
||
|
F14ItemIter() noexcept : itemPtr_{nullptr}, index_{0} {}
|
||
|
|
||
|
// default copy and move constructors and assignment operators are correct
|
||
|
|
||
|
explicit F14ItemIter(Packed const& packed)
|
||
|
: itemPtr_{packed.ptr()}, index_{packed.index()} {}
|
||
|
|
||
|
F14ItemIter(ChunkPtr chunk, std::size_t index)
|
||
|
: itemPtr_{std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index))},
|
||
|
index_{index} {
|
||
|
FOLLY_SAFE_DCHECK(index < Chunk::kCapacity, "");
|
||
|
assume(
|
||
|
std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index)) !=
|
||
|
nullptr);
|
||
|
assume(itemPtr_ != nullptr);
|
||
|
}
|
||
|
|
||
|
FOLLY_ALWAYS_INLINE void advanceImpl(bool checkEof, bool likelyDead) {
|
||
|
auto c = chunk();
|
||
|
|
||
|
// common case is packed entries
|
||
|
while (index_ > 0) {
|
||
|
--index_;
|
||
|
--itemPtr_;
|
||
|
if (LIKELY(c->occupied(index_))) {
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// It's fairly common for an iterator to be advanced and then become
|
||
|
// dead, for example in the return value from erase(iter) or in
|
||
|
// the last step of a loop. We'd like to make sure that the entire
|
||
|
// advance() method can be eliminated by the compiler's dead code
|
||
|
// elimination pass. To do that it must eliminate the loops, which
|
||
|
// requires it to prove that they have no side effects. It's easy
|
||
|
// to show that there are no escaping stores, but at the moment
|
||
|
// compilers also consider an infinite loop to be a side effect.
|
||
|
// (There are parts of the standard that would allow them to treat
|
||
|
// this as undefined behavior, but at the moment they don't exploit
|
||
|
// those clauses.)
|
||
|
//
|
||
|
// The following loop should really be a while loop, which would
|
||
|
// save a register, some instructions, and a conditional branch,
|
||
|
// but by writing it as a for loop the compiler can prove to itself
|
||
|
// that it will eventually terminate. (No matter that even if the
|
||
|
// loop executed in a single cycle it would take about 200 years to
|
||
|
// run all 2^64 iterations.)
|
||
|
//
|
||
|
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82776 has the bug we
|
||
|
// filed about the issue. while (true) {
|
||
|
for (std::size_t i = 1; !likelyDead || i != 0; ++i) {
|
||
|
if (checkEof) {
|
||
|
// exhausted the current chunk
|
||
|
if (UNLIKELY(c->eof())) {
|
||
|
FOLLY_SAFE_DCHECK(index_ == 0, "");
|
||
|
itemPtr_ = nullptr;
|
||
|
return;
|
||
|
}
|
||
|
} else {
|
||
|
FOLLY_SAFE_DCHECK(!c->eof(), "");
|
||
|
}
|
||
|
--c;
|
||
|
auto last = c->lastOccupied();
|
||
|
if (checkEof && !likelyDead) {
|
||
|
prefetchAddr(&*c - 1);
|
||
|
}
|
||
|
if (LIKELY(last.hasIndex())) {
|
||
|
index_ = last.index();
|
||
|
itemPtr_ = std::pointer_traits<ItemPtr>::pointer_to(c->item(index_));
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void precheckedAdvance() {
|
||
|
advanceImpl(false, false);
|
||
|
}
|
||
|
|
||
|
FOLLY_ALWAYS_INLINE void advance() {
|
||
|
advanceImpl(true, false);
|
||
|
}
|
||
|
|
||
|
FOLLY_ALWAYS_INLINE void advanceLikelyDead() {
|
||
|
advanceImpl(true, true);
|
||
|
}
|
||
|
|
||
|
ChunkPtr chunk() const {
|
||
|
return std::pointer_traits<ChunkPtr>::pointer_to(
|
||
|
Chunk::owner(*itemPtr_, index_));
|
||
|
}
|
||
|
|
||
|
std::size_t index() const {
|
||
|
return index_;
|
||
|
}
|
||
|
|
||
|
Item* itemAddr() const {
|
||
|
return std::addressof(*itemPtr_);
|
||
|
}
|
||
|
Item& item() const {
|
||
|
return *itemPtr_;
|
||
|
}
|
||
|
Item const& citem() const {
|
||
|
return *itemPtr_;
|
||
|
}
|
||
|
|
||
|
bool atEnd() const {
|
||
|
return itemPtr_ == nullptr;
|
||
|
}
|
||
|
|
||
|
Packed pack() const {
|
||
|
return Packed{itemPtr_, static_cast<uint8_t>(index_)};
|
||
|
}
|
||
|
|
||
|
bool operator==(F14ItemIter const& rhs) const {
|
||
|
// this form makes iter == end() into a single null check after inlining
|
||
|
// and constant propagation
|
||
|
return itemPtr_ == rhs.itemPtr_;
|
||
|
}
|
||
|
|
||
|
bool operator!=(F14ItemIter const& rhs) const {
|
||
|
return !(*this == rhs);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
ItemPtr itemPtr_;
|
||
|
std::size_t index_;
|
||
|
};
|
||
|
|
||
|
////////////////
|
||
|
|
||
|
template <typename SizeType, typename ItemIter, bool EnablePackedItemIter>
|
||
|
struct SizeAndPackedBegin {
|
||
|
SizeType size_{0};
|
||
|
|
||
|
private:
|
||
|
typename ItemIter::Packed packedBegin_{ItemIter{}.pack()};
|
||
|
|
||
|
public:
|
||
|
typename ItemIter::Packed& packedBegin() {
|
||
|
return packedBegin_;
|
||
|
}
|
||
|
|
||
|
typename ItemIter::Packed const& packedBegin() const {
|
||
|
return packedBegin_;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <typename SizeType, typename ItemIter>
|
||
|
struct SizeAndPackedBegin<SizeType, ItemIter, false> {
|
||
|
SizeType size_{0};
|
||
|
|
||
|
[[noreturn]] typename ItemIter::Packed& packedBegin() {
|
||
|
assume_unreachable();
|
||
|
}
|
||
|
|
||
|
[[noreturn]] typename ItemIter::Packed const& packedBegin() const {
|
||
|
assume_unreachable();
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <typename Policy>
|
||
|
class F14Table : public Policy {
|
||
|
public:
|
||
|
using Item = typename Policy::Item;
|
||
|
|
||
|
using value_type = typename Policy::Value;
|
||
|
using allocator_type = typename Policy::Alloc;
|
||
|
|
||
|
private:
|
||
|
using Alloc = typename Policy::Alloc;
|
||
|
using AllocTraits = typename Policy::AllocTraits;
|
||
|
using Hasher = typename Policy::Hasher;
|
||
|
using InternalSizeType = typename Policy::InternalSizeType;
|
||
|
using KeyEqual = typename Policy::KeyEqual;
|
||
|
|
||
|
using Policy::kAllocIsAlwaysEqual;
|
||
|
using Policy::kDefaultConstructIsNoexcept;
|
||
|
using Policy::kEnableItemIteration;
|
||
|
using Policy::kSwapIsNoexcept;
|
||
|
|
||
|
using Policy::destroyItemOnClear;
|
||
|
using Policy::isAvalanchingHasher;
|
||
|
using Policy::prefetchBeforeCopy;
|
||
|
using Policy::prefetchBeforeDestroy;
|
||
|
using Policy::prefetchBeforeRehash;
|
||
|
|
||
|
using ByteAlloc = typename AllocTraits::template rebind_alloc<uint8_t>;
|
||
|
using BytePtr = typename std::allocator_traits<ByteAlloc>::pointer;
|
||
|
|
||
|
using Chunk = F14Chunk<Item>;
|
||
|
using ChunkPtr =
|
||
|
typename std::pointer_traits<BytePtr>::template rebind<Chunk>;
|
||
|
|
||
|
using HashPair = typename F14HashToken::HashPair;
|
||
|
|
||
|
public:
|
||
|
using ItemIter = F14ItemIter<ChunkPtr>;
|
||
|
|
||
|
private:
|
||
|
//////// begin fields
|
||
|
|
||
|
ChunkPtr chunks_{Chunk::emptyInstance()};
|
||
|
InternalSizeType chunkMask_{0};
|
||
|
SizeAndPackedBegin<InternalSizeType, ItemIter, kEnableItemIteration>
|
||
|
sizeAndPackedBegin_;
|
||
|
|
||
|
//////// end fields
|
||
|
|
||
|
void swapContents(F14Table& rhs) noexcept {
|
||
|
using std::swap;
|
||
|
swap(chunks_, rhs.chunks_);
|
||
|
swap(chunkMask_, rhs.chunkMask_);
|
||
|
swap(sizeAndPackedBegin_.size_, rhs.sizeAndPackedBegin_.size_);
|
||
|
if (kEnableItemIteration) {
|
||
|
swap(
|
||
|
sizeAndPackedBegin_.packedBegin(),
|
||
|
rhs.sizeAndPackedBegin_.packedBegin());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
F14Table(
|
||
|
std::size_t initialCapacity,
|
||
|
Hasher const& hasher,
|
||
|
KeyEqual const& keyEqual,
|
||
|
Alloc const& alloc)
|
||
|
: Policy{hasher, keyEqual, alloc} {
|
||
|
if (initialCapacity > 0) {
|
||
|
reserve(initialCapacity);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
F14Table(F14Table const& rhs) : Policy{rhs} {
|
||
|
buildFromF14Table(rhs);
|
||
|
}
|
||
|
|
||
|
F14Table(F14Table const& rhs, Alloc const& alloc) : Policy{rhs, alloc} {
|
||
|
buildFromF14Table(rhs);
|
||
|
}
|
||
|
|
||
|
F14Table(F14Table&& rhs) noexcept(
|
||
|
std::is_nothrow_move_constructible<Hasher>::value&&
|
||
|
std::is_nothrow_move_constructible<KeyEqual>::value&&
|
||
|
std::is_nothrow_move_constructible<Alloc>::value)
|
||
|
: Policy{std::move(rhs)} {
|
||
|
swapContents(rhs);
|
||
|
}
|
||
|
|
||
|
F14Table(F14Table&& rhs, Alloc const& alloc) noexcept(kAllocIsAlwaysEqual)
|
||
|
: Policy{std::move(rhs), alloc} {
|
||
|
if (kAllocIsAlwaysEqual || this->alloc() == rhs.alloc()) {
|
||
|
// move storage (common case)
|
||
|
swapContents(rhs);
|
||
|
} else {
|
||
|
// new storage because allocators unequal, move values (rare case)
|
||
|
buildFromF14Table(std::move(rhs));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
F14Table& operator=(F14Table const& rhs) {
|
||
|
if (this != &rhs) {
|
||
|
reset();
|
||
|
static_cast<Policy&>(*this) = rhs;
|
||
|
buildFromF14Table(rhs);
|
||
|
}
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
F14Table& operator=(F14Table&& rhs) noexcept(
|
||
|
std::is_nothrow_move_assignable<Hasher>::value&&
|
||
|
std::is_nothrow_move_assignable<KeyEqual>::value &&
|
||
|
(kAllocIsAlwaysEqual ||
|
||
|
(AllocTraits::propagate_on_container_move_assignment::value &&
|
||
|
std::is_nothrow_move_assignable<Alloc>::value))) {
|
||
|
if (this != &rhs) {
|
||
|
reset();
|
||
|
static_cast<Policy&>(*this) = std::move(rhs);
|
||
|
if (AllocTraits::propagate_on_container_move_assignment::value ||
|
||
|
kAllocIsAlwaysEqual || this->alloc() == rhs.alloc()) {
|
||
|
// move storage (common case)
|
||
|
swapContents(rhs);
|
||
|
} else {
|
||
|
// new storage because allocators unequal, move values (rare case)
|
||
|
buildFromF14Table(std::move(rhs));
|
||
|
}
|
||
|
}
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
~F14Table() {
|
||
|
reset();
|
||
|
}
|
||
|
|
||
|
void swap(F14Table& rhs) noexcept(kSwapIsNoexcept) {
|
||
|
// If propagate_on_container_swap is false and allocators are
|
||
|
// not equal, the only way to accomplish a swap would be to do
|
||
|
// dynamic allocation and then move (or swap) each contained value.
|
||
|
// AllocatorAwareContainer-s are not supposed to attempt this, but
|
||
|
// rather are supposed to have undefined behavior in that case.
|
||
|
FOLLY_SAFE_CHECK(
|
||
|
AllocTraits::propagate_on_container_swap::value ||
|
||
|
kAllocIsAlwaysEqual || this->alloc() == rhs.alloc(),
|
||
|
"swap is undefined for unequal non-propagating allocators");
|
||
|
this->swapPolicy(rhs);
|
||
|
swapContents(rhs);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
//////// hash helpers
|
||
|
|
||
|
// Hash values are used to compute the desired position, which is the
|
||
|
// chunk index at which we would like to place a value (if there is no
|
||
|
// overflow), and the tag, which is an additional 8 bits of entropy.
|
||
|
//
|
||
|
// The standard's definition of hash function quality only refers to
|
||
|
// the probability of collisions of the entire hash value, not to the
|
||
|
// probability of collisions of the results of shifting or masking the
|
||
|
// hash value. Some hash functions, however, provide this stronger
|
||
|
// guarantee (not quite the same as the definition of avalanching,
|
||
|
// but similar).
|
||
|
//
|
||
|
// If the user-supplied hasher is an avalanching one (each bit of the
|
||
|
// hash value has a 50% chance of being the same for differing hash
|
||
|
// inputs), then we can just take 1 byte of the hash value for the tag
|
||
|
// and the rest for the desired position. Avalanching hashers also
|
||
|
// let us map hash value to array index position with just a bitmask
|
||
|
// without risking clumping. (Many hash tables just accept the risk
|
||
|
// and do it regardless.)
|
||
|
//
|
||
|
// std::hash<std::string> avalanches in all implementations we've
|
||
|
// examined: libstdc++-v3 uses MurmurHash2, and libc++ uses CityHash
|
||
|
// or MurmurHash2. The other std::hash specializations, however, do not
|
||
|
// have this property. std::hash for integral and pointer values is the
|
||
|
// identity function on libstdc++-v3 and libc++, in particular. In our
|
||
|
// experience it is also fairly common for user-defined specializations
|
||
|
// of std::hash to combine fields in an ad-hoc way that does not evenly
|
||
|
// distribute entropy among the bits of the result (a + 37 * b, for
|
||
|
// example, where a and b are integer fields).
|
||
|
//
|
||
|
// For hash functions we don't trust to avalanche, we repair things by
|
||
|
// applying a bit mixer to the user-supplied hash.
|
||
|
|
||
|
#if FOLLY_X64 || FOLLY_AARCH64
|
||
|
// 64-bit
|
||
|
static HashPair splitHash(std::size_t hash) {
|
||
|
static_assert(sizeof(std::size_t) == sizeof(uint64_t), "");
|
||
|
std::size_t tag;
|
||
|
if (!isAvalanchingHasher()) {
|
||
|
#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
|
||
|
#if FOLLY_SSE
|
||
|
// SSE4.2 CRC
|
||
|
std::size_t c = _mm_crc32_u64(0, hash);
|
||
|
tag = (c >> 24) | 0x80;
|
||
|
hash += c;
|
||
|
#else
|
||
|
// CRC is optional on armv8 (-march=armv8-a+crc), standard on armv8.1
|
||
|
std::size_t c = __crc32cd(0, hash);
|
||
|
tag = (c >> 24) | 0x80;
|
||
|
hash += c;
|
||
|
#endif
|
||
|
#else
|
||
|
// The mixer below is not fully avalanching for all 64 bits of
|
||
|
// output, but looks quite good for bits 18..63 and puts plenty
|
||
|
// of entropy even lower when considering multiple bits together
|
||
|
// (like the tag). Importantly, when under register pressure it
|
||
|
// uses fewer registers, instructions, and immediate constants
|
||
|
// than the alternatives, resulting in compact code that is more
|
||
|
// easily inlinable. In one instantiation a modified Murmur mixer
|
||
|
// was 48 bytes of assembly (even after using the same multiplicand
|
||
|
// for both steps) and this one was 27 bytes, for example.
|
||
|
auto const kMul = 0xc4ceb9fe1a85ec53ULL;
|
||
|
#ifdef _WIN32
|
||
|
__int64 signedHi;
|
||
|
__int64 signedLo = _mul128(
|
||
|
static_cast<__int64>(hash), static_cast<__int64>(kMul), &signedHi);
|
||
|
auto hi = static_cast<uint64_t>(signedHi);
|
||
|
auto lo = static_cast<uint64_t>(signedLo);
|
||
|
#else
|
||
|
auto hi = static_cast<uint64_t>(
|
||
|
(static_cast<unsigned __int128>(hash) * kMul) >> 64);
|
||
|
auto lo = hash * kMul;
|
||
|
#endif
|
||
|
hash = hi ^ lo;
|
||
|
hash *= kMul;
|
||
|
tag = ((hash >> 15) & 0x7f) | 0x80;
|
||
|
hash >>= 22;
|
||
|
#endif
|
||
|
} else {
|
||
|
// we don't trust the top bit
|
||
|
tag = (hash >> 56) | 0x80;
|
||
|
}
|
||
|
return std::make_pair(hash, tag);
|
||
|
}
|
||
|
#else
|
||
|
// 32-bit
|
||
|
static HashPair splitHash(std::size_t hash) {
|
||
|
static_assert(sizeof(std::size_t) == sizeof(uint32_t), "");
|
||
|
uint8_t tag;
|
||
|
if (!isAvalanchingHasher()) {
|
||
|
#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
|
||
|
#if FOLLY_SSE
|
||
|
// SSE4.2 CRC
|
||
|
auto c = _mm_crc32_u32(0, hash);
|
||
|
tag = static_cast<uint8_t>(~(c >> 25));
|
||
|
hash += c;
|
||
|
#else
|
||
|
auto c = __crc32cw(0, hash);
|
||
|
tag = static_cast<uint8_t>(~(c >> 25));
|
||
|
hash += c;
|
||
|
#endif
|
||
|
#else
|
||
|
// finalizer for 32-bit murmur2
|
||
|
hash ^= hash >> 13;
|
||
|
hash *= 0x5bd1e995;
|
||
|
hash ^= hash >> 15;
|
||
|
tag = static_cast<uint8_t>(~(hash >> 25));
|
||
|
#endif
|
||
|
} else {
|
||
|
// we don't trust the top bit
|
||
|
tag = (hash >> 24) | 0x80;
|
||
|
}
|
||
|
return std::make_pair(hash, tag);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
//////// memory management helpers
|
||
|
|
||
|
static std::size_t chunkAllocSize(
|
||
|
std::size_t chunkCount,
|
||
|
std::size_t maxSizeWithoutRehash) {
|
||
|
if (chunkCount == 1) {
|
||
|
FOLLY_SAFE_DCHECK((maxSizeWithoutRehash % 2) == 0, "");
|
||
|
static_assert(offsetof(Chunk, rawItems_) == 16, "");
|
||
|
return 16 + sizeof(Item) * maxSizeWithoutRehash;
|
||
|
} else {
|
||
|
return sizeof(Chunk) * chunkCount;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ChunkPtr initializeChunks(
|
||
|
BytePtr raw,
|
||
|
std::size_t chunkCount,
|
||
|
std::size_t maxSizeWithoutRehash) {
|
||
|
static_assert(std::is_trivial<Chunk>::value, "F14Chunk should be POD");
|
||
|
auto chunks = static_cast<Chunk*>(static_cast<void*>(&*raw));
|
||
|
for (std::size_t i = 0; i < chunkCount; ++i) {
|
||
|
chunks[i].clear();
|
||
|
}
|
||
|
chunks[0].markEof(chunkCount == 1 ? maxSizeWithoutRehash : 1);
|
||
|
return std::pointer_traits<ChunkPtr>::pointer_to(*chunks);
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
ItemIter begin() const noexcept {
|
||
|
FOLLY_SAFE_DCHECK(kEnableItemIteration, "");
|
||
|
return ItemIter{sizeAndPackedBegin_.packedBegin()};
|
||
|
}
|
||
|
|
||
|
ItemIter end() const noexcept {
|
||
|
return ItemIter{};
|
||
|
}
|
||
|
|
||
|
bool empty() const noexcept {
|
||
|
return size() == 0;
|
||
|
}
|
||
|
|
||
|
InternalSizeType size() const noexcept {
|
||
|
return sizeAndPackedBegin_.size_;
|
||
|
}
|
||
|
|
||
|
std::size_t max_size() const noexcept {
|
||
|
auto& a = this->alloc();
|
||
|
return std::min<std::size_t>(
|
||
|
(std::numeric_limits<InternalSizeType>::max)(),
|
||
|
AllocTraits::max_size(a));
|
||
|
}
|
||
|
|
||
|
std::size_t bucket_count() const noexcept {
|
||
|
// bucket_count is just a synthetic construct for the outside world
|
||
|
// so that size, bucket_count, load_factor, and max_load_factor are
|
||
|
// all self-consistent. The only one of those that is real is size().
|
||
|
if (chunkMask_ != 0) {
|
||
|
return (chunkMask_ + 1) * Chunk::kDesiredCapacity;
|
||
|
} else {
|
||
|
return chunks_->chunk0Capacity();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::size_t max_bucket_count() const noexcept {
|
||
|
return max_size();
|
||
|
}
|
||
|
|
||
|
float load_factor() const noexcept {
|
||
|
return empty()
|
||
|
? 0.0f
|
||
|
: static_cast<float>(size()) / static_cast<float>(bucket_count());
|
||
|
}
|
||
|
|
||
|
float max_load_factor() const noexcept {
|
||
|
return 1.0f;
|
||
|
}
|
||
|
|
||
|
void max_load_factor(float) noexcept {
|
||
|
// Probing hash tables can't run load factors >= 1 (unlike chaining
|
||
|
// tables). In addition, we have measured that there is little or
|
||
|
// no performance advantage to running a smaller load factor (cache
|
||
|
// locality losses outweigh the small reduction in probe lengths,
|
||
|
// often making it slower). Therefore, we've decided to just fix
|
||
|
// max_load_factor at 1.0f regardless of what the user requests.
|
||
|
// This has an additional advantage that we don't have to store it.
|
||
|
// Taking alignment into consideration this makes every F14 table
|
||
|
// 8 bytes smaller, and is part of the reason an empty F14NodeMap
|
||
|
// is almost half the size of an empty std::unordered_map (32 vs
|
||
|
// 56 bytes).
|
||
|
//
|
||
|
// I don't have a strong opinion on whether we should remove this
|
||
|
// method or leave a stub, let ngbronson or xshi know if you have a
|
||
|
// compelling argument either way.
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
// Our probe strategy is to advance through additional chunks with
|
||
|
// a stride that is key-specific. This is called double hashing,
|
||
|
// and is a well known and high quality probing strategy. So long as
|
||
|
// the stride and the chunk count are relatively prime, we will visit
|
||
|
// every chunk once and then return to the original chunk, letting us
|
||
|
// detect and end the cycle. The chunk count is a power of two, so
|
||
|
// we can satisfy the relatively prime part by choosing an odd stride.
|
||
|
// We've already computed a high quality secondary hash value for the
|
||
|
// tag, so we just use it for the second probe hash as well.
|
||
|
//
|
||
|
// At the maximum load factor of 12/14, expected probe length for a
|
||
|
// find hit is 1.041, with 99% of keys found in the first three chunks.
|
||
|
// Expected probe length for a find miss (or insert) is 1.275, with a
|
||
|
// p99 probe length of 4 (fewer than 1% of failing find look at 5 or
|
||
|
// more chunks).
|
||
|
//
|
||
|
// This code is structured so you can try various ways of encoding
|
||
|
// the current probe state. For example, at the moment the probe's
|
||
|
// state is the position in the cycle and the resulting chunk index is
|
||
|
// computed from that inside probeCurrentIndex. We could also make the
|
||
|
// probe state the chunk index, and then increment it by hp.second *
|
||
|
// 2 + 1 in probeAdvance. Wrapping can be applied early or late as
|
||
|
// well. This particular code seems to be easier for the optimizer
|
||
|
// to understand.
|
||
|
//
|
||
|
// We could also implement probing strategies that resulted in the same
|
||
|
// tour for every key initially assigned to a chunk (linear probing or
|
||
|
// quadratic), but that results in longer probe lengths. In particular,
|
||
|
// the cache locality wins of linear probing are not worth the increase
|
||
|
// in probe lengths (extra work and less branch predictability) in
|
||
|
// our experiments.
|
||
|
|
||
|
std::size_t probeDelta(HashPair hp) const {
|
||
|
return 2 * hp.second + 1;
|
||
|
}
|
||
|
|
||
|
template <typename K>
|
||
|
FOLLY_ALWAYS_INLINE ItemIter findImpl(HashPair hp, K const& key) const {
|
||
|
std::size_t index = hp.first;
|
||
|
std::size_t step = probeDelta(hp);
|
||
|
for (std::size_t tries = 0; tries <= chunkMask_; ++tries) {
|
||
|
ChunkPtr chunk = chunks_ + (index & chunkMask_);
|
||
|
if (sizeof(Chunk) > 64) {
|
||
|
prefetchAddr(chunk->itemAddr(8));
|
||
|
}
|
||
|
auto hits = chunk->tagMatchIter(hp.second);
|
||
|
while (hits.hasNext()) {
|
||
|
auto i = hits.next();
|
||
|
if (LIKELY(this->keyMatchesItem(key, chunk->item(i)))) {
|
||
|
// Tag match and key match were both successful. The chance
|
||
|
// of a false tag match is 1/128 for each key in the chunk
|
||
|
// (with a proper hash function).
|
||
|
return ItemIter{chunk, i};
|
||
|
}
|
||
|
}
|
||
|
if (LIKELY(chunk->outboundOverflowCount() == 0)) {
|
||
|
// No keys that wanted to be placed in this chunk were denied
|
||
|
// entry, so our search is over. This is the common case.
|
||
|
break;
|
||
|
}
|
||
|
index += step;
|
||
|
}
|
||
|
// Loop exit because tries is exhausted is rare, but possible.
|
||
|
// That means that for every chunk there is currently a key present
|
||
|
// in the map that visited that chunk on its probe search but ended
|
||
|
// up somewhere else, and we have searched every chunk.
|
||
|
return ItemIter{};
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
// Prehashing splits the work of find(key) into two calls, enabling you
|
||
|
// to manually implement loop pipelining for hot bulk lookups. prehash
|
||
|
// computes the hash and prefetches the first computed memory location,
|
||
|
// and the two-arg find(F14HashToken,K) performs the rest of the search.
|
||
|
template <typename K>
|
||
|
F14HashToken prehash(K const& key) const {
|
||
|
FOLLY_SAFE_DCHECK(chunks_ != nullptr, "");
|
||
|
auto hp = splitHash(this->computeKeyHash(key));
|
||
|
ChunkPtr firstChunk = chunks_ + (hp.first & chunkMask_);
|
||
|
prefetchAddr(firstChunk);
|
||
|
return F14HashToken(std::move(hp));
|
||
|
}
|
||
|
|
||
|
template <typename K>
|
||
|
FOLLY_ALWAYS_INLINE ItemIter find(K const& key) const {
|
||
|
auto hp = splitHash(this->computeKeyHash(key));
|
||
|
return findImpl(hp, key);
|
||
|
}
|
||
|
|
||
|
template <typename K>
|
||
|
FOLLY_ALWAYS_INLINE ItemIter
|
||
|
find(F14HashToken const& token, K const& key) const {
|
||
|
FOLLY_SAFE_DCHECK(
|
||
|
splitHash(this->computeKeyHash(key)) == static_cast<HashPair>(token),
|
||
|
"");
|
||
|
return findImpl(static_cast<HashPair>(token), key);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
void adjustSizeAndBeginAfterInsert(ItemIter iter) {
|
||
|
if (kEnableItemIteration) {
|
||
|
// packedBegin is the max of all valid ItemIter::pack()
|
||
|
auto packed = iter.pack();
|
||
|
if (sizeAndPackedBegin_.packedBegin() < packed) {
|
||
|
sizeAndPackedBegin_.packedBegin() = packed;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
++sizeAndPackedBegin_.size_;
|
||
|
}
|
||
|
|
||
|
// Ignores hp if pos.chunk()->hostedOverflowCount() == 0
|
||
|
void eraseBlank(ItemIter iter, HashPair hp) {
|
||
|
iter.chunk()->clearTag(iter.index());
|
||
|
|
||
|
if (iter.chunk()->hostedOverflowCount() != 0) {
|
||
|
// clean up
|
||
|
std::size_t index = hp.first;
|
||
|
std::size_t delta = probeDelta(hp);
|
||
|
uint8_t hostedOp = 0;
|
||
|
while (true) {
|
||
|
ChunkPtr chunk = chunks_ + (index & chunkMask_);
|
||
|
if (chunk == iter.chunk()) {
|
||
|
chunk->adjustHostedOverflowCount(hostedOp);
|
||
|
break;
|
||
|
}
|
||
|
chunk->decrOutboundOverflowCount();
|
||
|
hostedOp = Chunk::kDecrHostedOverflowCount;
|
||
|
index += delta;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void adjustSizeAndBeginBeforeErase(ItemIter iter) {
|
||
|
--sizeAndPackedBegin_.size_;
|
||
|
if (kEnableItemIteration) {
|
||
|
if (iter.pack() == sizeAndPackedBegin_.packedBegin()) {
|
||
|
if (size() == 0) {
|
||
|
iter = ItemIter{};
|
||
|
} else {
|
||
|
iter.precheckedAdvance();
|
||
|
}
|
||
|
sizeAndPackedBegin_.packedBegin() = iter.pack();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template <typename... Args>
|
||
|
void insertAtBlank(ItemIter pos, HashPair hp, Args&&... args) {
|
||
|
try {
|
||
|
auto dst = pos.itemAddr();
|
||
|
this->constructValueAtItem(size(), dst, std::forward<Args>(args)...);
|
||
|
} catch (...) {
|
||
|
eraseBlank(pos, hp);
|
||
|
throw;
|
||
|
}
|
||
|
adjustSizeAndBeginAfterInsert(pos);
|
||
|
}
|
||
|
|
||
|
ItemIter allocateTag(uint8_t* fullness, HashPair hp) {
|
||
|
ChunkPtr chunk;
|
||
|
std::size_t index = hp.first;
|
||
|
std::size_t delta = probeDelta(hp);
|
||
|
uint8_t hostedOp = 0;
|
||
|
while (true) {
|
||
|
index &= chunkMask_;
|
||
|
chunk = chunks_ + index;
|
||
|
if (LIKELY(fullness[index] < Chunk::kCapacity)) {
|
||
|
break;
|
||
|
}
|
||
|
chunk->incrOutboundOverflowCount();
|
||
|
hostedOp = Chunk::kIncrHostedOverflowCount;
|
||
|
index += delta;
|
||
|
}
|
||
|
unsigned itemIndex = fullness[index]++;
|
||
|
FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
|
||
|
chunk->setTag(itemIndex, hp.second);
|
||
|
chunk->adjustHostedOverflowCount(hostedOp);
|
||
|
return ItemIter{chunk, itemIndex};
|
||
|
}
|
||
|
|
||
|
ChunkPtr lastOccupiedChunk() const {
|
||
|
FOLLY_SAFE_DCHECK(size() > 0, "");
|
||
|
if (kEnableItemIteration) {
|
||
|
return begin().chunk();
|
||
|
} else {
|
||
|
return chunks_ + chunkMask_;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
void directBuildFrom(T&& src) {
|
||
|
FOLLY_SAFE_DCHECK(src.size() > 0 && chunkMask_ == src.chunkMask_, "");
|
||
|
|
||
|
// We use std::forward<T> to allow portions of src to be moved out by
|
||
|
// either beforeBuild or afterBuild, but we are just relying on good
|
||
|
// behavior of our Policy superclass to ensure that any particular
|
||
|
// field of this is a donor at most once.
|
||
|
|
||
|
auto undoState =
|
||
|
this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
|
||
|
bool success = false;
|
||
|
SCOPE_EXIT {
|
||
|
this->afterBuild(
|
||
|
undoState, success, src.size(), bucket_count(), std::forward<T>(src));
|
||
|
};
|
||
|
|
||
|
// Copy can fail part-way through if a Value copy constructor throws.
|
||
|
// Failing afterBuild is limited in its cleanup power in this case,
|
||
|
// because it can't enumerate the items that were actually copied.
|
||
|
// Fortunately we can divide the situation into cases where all of
|
||
|
// the state is owned by the table itself (F14Node and F14Value),
|
||
|
// for which clearImpl() can do partial cleanup, and cases where all
|
||
|
// of the values are owned by the policy (F14Vector), in which case
|
||
|
// partial failure should not occur. Sorry for the subtle invariants
|
||
|
// in the Policy API.
|
||
|
|
||
|
if (is_trivially_copyable<Item>::value && !this->destroyItemOnClear() &&
|
||
|
bucket_count() == src.bucket_count()) {
|
||
|
// most happy path
|
||
|
auto n = chunkAllocSize(chunkMask_ + 1, bucket_count());
|
||
|
std::memcpy(&chunks_[0], &src.chunks_[0], n);
|
||
|
sizeAndPackedBegin_.size_ = src.size();
|
||
|
if (kEnableItemIteration) {
|
||
|
auto srcBegin = src.begin();
|
||
|
sizeAndPackedBegin_.packedBegin() =
|
||
|
ItemIter{chunks_ + (srcBegin.chunk() - src.chunks_),
|
||
|
srcBegin.index()}
|
||
|
.pack();
|
||
|
}
|
||
|
} else {
|
||
|
std::size_t maxChunkIndex = src.lastOccupiedChunk() - src.chunks_;
|
||
|
|
||
|
// happy path, no rehash but pack items toward bottom of chunk and
|
||
|
// use copy constructor
|
||
|
auto srcChunk = &src.chunks_[maxChunkIndex];
|
||
|
Chunk* dstChunk = &chunks_[maxChunkIndex];
|
||
|
do {
|
||
|
dstChunk->copyOverflowInfoFrom(*srcChunk);
|
||
|
|
||
|
auto iter = srcChunk->occupiedIter();
|
||
|
if (prefetchBeforeCopy()) {
|
||
|
for (auto piter = iter; piter.hasNext();) {
|
||
|
this->prefetchValue(srcChunk->citem(piter.next()));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::size_t dstI = 0;
|
||
|
for (; iter.hasNext(); ++dstI) {
|
||
|
auto srcI = iter.next();
|
||
|
auto&& srcArg =
|
||
|
std::forward<T>(src).buildArgForItem(srcChunk->item(srcI));
|
||
|
auto dst = dstChunk->itemAddr(dstI);
|
||
|
this->constructValueAtItem(
|
||
|
0, dst, std::forward<decltype(srcArg)>(srcArg));
|
||
|
dstChunk->setTag(dstI, srcChunk->tag(srcI));
|
||
|
++sizeAndPackedBegin_.size_;
|
||
|
}
|
||
|
|
||
|
--srcChunk;
|
||
|
--dstChunk;
|
||
|
} while (size() != src.size());
|
||
|
|
||
|
// reset doesn't care about packedBegin, so we don't fix it until the end
|
||
|
if (kEnableItemIteration) {
|
||
|
sizeAndPackedBegin_.packedBegin() =
|
||
|
ItemIter{chunks_ + maxChunkIndex,
|
||
|
chunks_[maxChunkIndex].lastOccupied().index()}
|
||
|
.pack();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
success = true;
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
void rehashBuildFrom(T&& src) {
|
||
|
FOLLY_SAFE_DCHECK(src.chunkMask_ > chunkMask_, "");
|
||
|
|
||
|
// 1 byte per chunk means < 1 bit per value temporary overhead
|
||
|
std::array<uint8_t, 256> stackBuf;
|
||
|
uint8_t* fullness;
|
||
|
auto cc = chunkMask_ + 1;
|
||
|
if (cc <= stackBuf.size()) {
|
||
|
fullness = stackBuf.data();
|
||
|
} else {
|
||
|
ByteAlloc a{this->alloc()};
|
||
|
fullness = &*std::allocator_traits<ByteAlloc>::allocate(a, cc);
|
||
|
}
|
||
|
SCOPE_EXIT {
|
||
|
if (cc > stackBuf.size()) {
|
||
|
ByteAlloc a{this->alloc()};
|
||
|
std::allocator_traits<ByteAlloc>::deallocate(
|
||
|
a,
|
||
|
std::pointer_traits<typename std::allocator_traits<
|
||
|
ByteAlloc>::pointer>::pointer_to(*fullness),
|
||
|
cc);
|
||
|
}
|
||
|
};
|
||
|
std::memset(fullness, '\0', cc);
|
||
|
|
||
|
// We use std::forward<T> to allow portions of src to be moved out by
|
||
|
// either beforeBuild or afterBuild, but we are just relying on good
|
||
|
// behavior of our Policy superclass to ensure that any particular
|
||
|
// field of this is a donor at most once.
|
||
|
|
||
|
// Exception safety requires beforeBuild to happen after all of the
|
||
|
// allocate() calls.
|
||
|
auto undoState =
|
||
|
this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
|
||
|
bool success = false;
|
||
|
SCOPE_EXIT {
|
||
|
this->afterBuild(
|
||
|
undoState, success, src.size(), bucket_count(), std::forward<T>(src));
|
||
|
};
|
||
|
|
||
|
// The current table is at a valid state at all points for policies
|
||
|
// in which non-trivial values are owned by the main table (F14Node
|
||
|
// and F14Value), so reset() will clean things up properly if we
|
||
|
// fail partway through. For the case that the policy manages value
|
||
|
// lifecycle (F14Vector) then nothing after beforeBuild can throw and
|
||
|
// we don't have to worry about partial failure.
|
||
|
|
||
|
std::size_t srcChunkIndex = src.lastOccupiedChunk() - src.chunks_;
|
||
|
while (true) {
|
||
|
auto srcChunk = &src.chunks_[srcChunkIndex];
|
||
|
auto iter = srcChunk->occupiedIter();
|
||
|
if (prefetchBeforeRehash()) {
|
||
|
for (auto piter = iter; piter.hasNext();) {
|
||
|
this->prefetchValue(srcChunk->item(piter.next()));
|
||
|
}
|
||
|
}
|
||
|
if (srcChunk->hostedOverflowCount() == 0) {
|
||
|
// all items are in their preferred chunk (no probing), so we
|
||
|
// don't need to compute any hash values
|
||
|
while (iter.hasNext()) {
|
||
|
auto i = iter.next();
|
||
|
auto& srcItem = srcChunk->item(i);
|
||
|
auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
|
||
|
HashPair hp{srcChunkIndex, srcChunk->tag(i)};
|
||
|
insertAtBlank(
|
||
|
allocateTag(fullness, hp),
|
||
|
hp,
|
||
|
std::forward<decltype(srcArg)>(srcArg));
|
||
|
}
|
||
|
} else {
|
||
|
// any chunk's items might be in here
|
||
|
while (iter.hasNext()) {
|
||
|
auto i = iter.next();
|
||
|
auto& srcItem = srcChunk->item(i);
|
||
|
auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
|
||
|
auto const& srcKey = src.keyForValue(srcArg);
|
||
|
auto hp = splitHash(this->computeKeyHash(srcKey));
|
||
|
FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(i), "");
|
||
|
insertAtBlank(
|
||
|
allocateTag(fullness, hp),
|
||
|
hp,
|
||
|
std::forward<decltype(srcArg)>(srcArg));
|
||
|
}
|
||
|
}
|
||
|
if (srcChunkIndex == 0) {
|
||
|
break;
|
||
|
}
|
||
|
--srcChunkIndex;
|
||
|
}
|
||
|
|
||
|
success = true;
|
||
|
}
|
||
|
|
||
|
template <typename T>
|
||
|
FOLLY_NOINLINE void buildFromF14Table(T&& src) {
|
||
|
FOLLY_SAFE_DCHECK(size() == 0, "");
|
||
|
if (src.size() == 0) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
reserveForInsert(src.size());
|
||
|
try {
|
||
|
if (chunkMask_ == src.chunkMask_) {
|
||
|
directBuildFrom(std::forward<T>(src));
|
||
|
} else {
|
||
|
rehashBuildFrom(std::forward<T>(src));
|
||
|
}
|
||
|
} catch (...) {
|
||
|
reset();
|
||
|
F14LinkCheck<getF14IntrinsicsMode()>::check();
|
||
|
throw;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
FOLLY_NOINLINE void reserveImpl(
|
||
|
std::size_t capacity,
|
||
|
std::size_t origChunkCount,
|
||
|
std::size_t origMaxSizeWithoutRehash) {
|
||
|
FOLLY_SAFE_DCHECK(capacity >= size(), "");
|
||
|
|
||
|
// compute new size
|
||
|
std::size_t const kInitialCapacity = 2;
|
||
|
std::size_t const kHalfChunkCapacity =
|
||
|
(Chunk::kDesiredCapacity / 2) & ~std::size_t{1};
|
||
|
std::size_t newMaxSizeWithoutRehash;
|
||
|
std::size_t newChunkCount;
|
||
|
if (capacity <= kHalfChunkCapacity) {
|
||
|
newChunkCount = 1;
|
||
|
newMaxSizeWithoutRehash =
|
||
|
(capacity < kInitialCapacity) ? kInitialCapacity : kHalfChunkCapacity;
|
||
|
} else {
|
||
|
newChunkCount = nextPowTwo((capacity - 1) / Chunk::kDesiredCapacity + 1);
|
||
|
newMaxSizeWithoutRehash = newChunkCount * Chunk::kDesiredCapacity;
|
||
|
|
||
|
constexpr std::size_t kMaxChunksWithoutCapacityOverflow =
|
||
|
(std::numeric_limits<std::size_t>::max)() / Chunk::kDesiredCapacity;
|
||
|
|
||
|
if (newChunkCount > kMaxChunksWithoutCapacityOverflow ||
|
||
|
newMaxSizeWithoutRehash > max_size()) {
|
||
|
throw_exception<std::bad_alloc>();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (origMaxSizeWithoutRehash != newMaxSizeWithoutRehash) {
|
||
|
rehashImpl(
|
||
|
origChunkCount,
|
||
|
origMaxSizeWithoutRehash,
|
||
|
newChunkCount,
|
||
|
newMaxSizeWithoutRehash);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void rehashImpl(
|
||
|
std::size_t origChunkCount,
|
||
|
std::size_t origMaxSizeWithoutRehash,
|
||
|
std::size_t newChunkCount,
|
||
|
std::size_t newMaxSizeWithoutRehash) {
|
||
|
auto origChunks = chunks_;
|
||
|
|
||
|
BytePtr rawAllocation;
|
||
|
auto undoState = this->beforeRehash(
|
||
|
size(),
|
||
|
origMaxSizeWithoutRehash,
|
||
|
newMaxSizeWithoutRehash,
|
||
|
chunkAllocSize(newChunkCount, newMaxSizeWithoutRehash),
|
||
|
rawAllocation);
|
||
|
chunks_ =
|
||
|
initializeChunks(rawAllocation, newChunkCount, newMaxSizeWithoutRehash);
|
||
|
|
||
|
FOLLY_SAFE_DCHECK(
|
||
|
newChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
|
||
|
chunkMask_ = static_cast<InternalSizeType>(newChunkCount - 1);
|
||
|
|
||
|
bool success = false;
|
||
|
SCOPE_EXIT {
|
||
|
// this SCOPE_EXIT reverts chunks_ and chunkMask_ if necessary
|
||
|
BytePtr finishedRawAllocation = nullptr;
|
||
|
std::size_t finishedAllocSize = 0;
|
||
|
if (LIKELY(success)) {
|
||
|
if (origMaxSizeWithoutRehash > 0) {
|
||
|
finishedRawAllocation = std::pointer_traits<BytePtr>::pointer_to(
|
||
|
*static_cast<uint8_t*>(static_cast<void*>(&*origChunks)));
|
||
|
finishedAllocSize =
|
||
|
chunkAllocSize(origChunkCount, origMaxSizeWithoutRehash);
|
||
|
}
|
||
|
} else {
|
||
|
finishedRawAllocation = rawAllocation;
|
||
|
finishedAllocSize =
|
||
|
chunkAllocSize(newChunkCount, newMaxSizeWithoutRehash);
|
||
|
chunks_ = origChunks;
|
||
|
FOLLY_SAFE_DCHECK(
|
||
|
origChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
|
||
|
chunkMask_ = static_cast<InternalSizeType>(origChunkCount - 1);
|
||
|
F14LinkCheck<getF14IntrinsicsMode()>::check();
|
||
|
}
|
||
|
|
||
|
this->afterRehash(
|
||
|
std::move(undoState),
|
||
|
success,
|
||
|
size(),
|
||
|
origMaxSizeWithoutRehash,
|
||
|
newMaxSizeWithoutRehash,
|
||
|
finishedRawAllocation,
|
||
|
finishedAllocSize);
|
||
|
};
|
||
|
|
||
|
if (size() == 0) {
|
||
|
// nothing to do
|
||
|
} else if (origChunkCount == 1 && newChunkCount == 1) {
|
||
|
// no mask, no chunk scan, no hash computation, no probing
|
||
|
auto srcChunk = origChunks;
|
||
|
auto dstChunk = chunks_;
|
||
|
std::size_t srcI = 0;
|
||
|
std::size_t dstI = 0;
|
||
|
while (dstI < size()) {
|
||
|
if (LIKELY(srcChunk->occupied(srcI))) {
|
||
|
dstChunk->setTag(dstI, srcChunk->tag(srcI));
|
||
|
this->moveItemDuringRehash(
|
||
|
dstChunk->itemAddr(dstI), srcChunk->item(srcI));
|
||
|
++dstI;
|
||
|
}
|
||
|
++srcI;
|
||
|
}
|
||
|
if (kEnableItemIteration) {
|
||
|
sizeAndPackedBegin_.packedBegin() = ItemIter{dstChunk, dstI - 1}.pack();
|
||
|
}
|
||
|
} else {
|
||
|
// 1 byte per chunk means < 1 bit per value temporary overhead
|
||
|
std::array<uint8_t, 256> stackBuf;
|
||
|
uint8_t* fullness;
|
||
|
if (newChunkCount <= stackBuf.size()) {
|
||
|
fullness = stackBuf.data();
|
||
|
} else {
|
||
|
ByteAlloc a{this->alloc()};
|
||
|
// may throw
|
||
|
fullness =
|
||
|
&*std::allocator_traits<ByteAlloc>::allocate(a, newChunkCount);
|
||
|
}
|
||
|
std::memset(fullness, '\0', newChunkCount);
|
||
|
SCOPE_EXIT {
|
||
|
if (newChunkCount > stackBuf.size()) {
|
||
|
ByteAlloc a{this->alloc()};
|
||
|
std::allocator_traits<ByteAlloc>::deallocate(
|
||
|
a,
|
||
|
std::pointer_traits<typename std::allocator_traits<
|
||
|
ByteAlloc>::pointer>::pointer_to(*fullness),
|
||
|
newChunkCount);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
auto srcChunk = origChunks + origChunkCount - 1;
|
||
|
std::size_t remaining = size();
|
||
|
while (remaining > 0) {
|
||
|
auto iter = srcChunk->occupiedIter();
|
||
|
if (prefetchBeforeRehash()) {
|
||
|
for (auto piter = iter; piter.hasNext();) {
|
||
|
this->prefetchValue(srcChunk->item(piter.next()));
|
||
|
}
|
||
|
}
|
||
|
while (iter.hasNext()) {
|
||
|
--remaining;
|
||
|
auto srcI = iter.next();
|
||
|
Item& srcItem = srcChunk->item(srcI);
|
||
|
auto hp = splitHash(
|
||
|
this->computeItemHash(const_cast<Item const&>(srcItem)));
|
||
|
FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(srcI), "");
|
||
|
|
||
|
auto dstIter = allocateTag(fullness, hp);
|
||
|
this->moveItemDuringRehash(dstIter.itemAddr(), srcItem);
|
||
|
}
|
||
|
--srcChunk;
|
||
|
}
|
||
|
|
||
|
if (kEnableItemIteration) {
|
||
|
// this code replaces size invocations of adjustSizeAndBeginAfterInsert
|
||
|
std::size_t i = chunkMask_;
|
||
|
while (fullness[i] == 0) {
|
||
|
--i;
|
||
|
}
|
||
|
sizeAndPackedBegin_.packedBegin() =
|
||
|
ItemIter{chunks_ + i, std::size_t{fullness[i]} - 1}.pack();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
success = true;
|
||
|
}
|
||
|
|
||
|
void asanOnReserve(std::size_t capacity) {
|
||
|
if (kIsSanitizeAddress && capacity > size()) {
|
||
|
asanPendingSafeInserts += capacity - size();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool asanShouldAddExtraRehash() {
|
||
|
if (!kIsSanitizeAddress) {
|
||
|
return false;
|
||
|
} else if (asanPendingSafeInserts > 0) {
|
||
|
--asanPendingSafeInserts;
|
||
|
return false;
|
||
|
} else if (size() <= 1) {
|
||
|
return size() > 0;
|
||
|
} else {
|
||
|
constexpr std::size_t kBigPrime = 4294967291U;
|
||
|
auto s = (asanRehashState += kBigPrime);
|
||
|
return (s % size()) == 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void asanExtraRehash() {
|
||
|
auto cc = chunkMask_ + 1;
|
||
|
auto bc = bucket_count();
|
||
|
rehashImpl(cc, bc, cc, bc);
|
||
|
}
|
||
|
|
||
|
void asanOnInsert() {
|
||
|
// When running under ASAN, we add a spurious rehash with 1/size()
|
||
|
// probability before every insert. This means that finding reference
|
||
|
// stability problems for F14Value and F14Vector is much more likely.
|
||
|
// The most common pattern that causes this is
|
||
|
//
|
||
|
// auto& ref = map[k1]; map[k2] = foo(ref);
|
||
|
//
|
||
|
// One way to fix this is to call map.reserve(N) before such a
|
||
|
// sequence, where N is the number of keys that might be inserted
|
||
|
// within the section that retains references.
|
||
|
if (asanShouldAddExtraRehash()) {
|
||
|
asanExtraRehash();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
// user has no control over max_load_factor
|
||
|
|
||
|
void rehash(std::size_t capacity) {
|
||
|
reserve(capacity);
|
||
|
}
|
||
|
|
||
|
void reserve(std::size_t capacity) {
|
||
|
// We want to support the pattern
|
||
|
// map.reserve(2); auto& r1 = map[k1]; auto& r2 = map[k2];
|
||
|
asanOnReserve(capacity);
|
||
|
reserveImpl(
|
||
|
std::max<std::size_t>(capacity, size()),
|
||
|
chunkMask_ + 1,
|
||
|
bucket_count());
|
||
|
}
|
||
|
|
||
|
// Returns true iff a rehash was performed
|
||
|
void reserveForInsert(size_t incoming = 1) {
|
||
|
auto capacity = size() + incoming;
|
||
|
auto bc = bucket_count();
|
||
|
if (capacity - 1 >= bc) {
|
||
|
reserveImpl(capacity, chunkMask_ + 1, bc);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Returns pos,true if construct, pos,false if found. key is only used
|
||
|
// during the search; all constructor args for an inserted value come
|
||
|
// from args... key won't be accessed after args are touched.
|
||
|
template <typename K, typename... Args>
|
||
|
std::pair<ItemIter, bool> tryEmplaceValue(K const& key, Args&&... args) {
|
||
|
const auto hp = splitHash(this->computeKeyHash(key));
|
||
|
|
||
|
if (size() > 0) {
|
||
|
auto existing = findImpl(hp, key);
|
||
|
if (!existing.atEnd()) {
|
||
|
return std::make_pair(existing, false);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
asanOnInsert();
|
||
|
|
||
|
reserveForInsert();
|
||
|
|
||
|
std::size_t index = hp.first;
|
||
|
ChunkPtr chunk = chunks_ + (index & chunkMask_);
|
||
|
auto firstEmpty = chunk->firstEmpty();
|
||
|
|
||
|
if (!firstEmpty.hasIndex()) {
|
||
|
std::size_t delta = probeDelta(hp);
|
||
|
do {
|
||
|
chunk->incrOutboundOverflowCount();
|
||
|
index += delta;
|
||
|
chunk = chunks_ + (index & chunkMask_);
|
||
|
firstEmpty = chunk->firstEmpty();
|
||
|
} while (!firstEmpty.hasIndex());
|
||
|
chunk->adjustHostedOverflowCount(Chunk::kIncrHostedOverflowCount);
|
||
|
}
|
||
|
std::size_t itemIndex = firstEmpty.index();
|
||
|
FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
|
||
|
|
||
|
chunk->setTag(itemIndex, hp.second);
|
||
|
ItemIter iter{chunk, itemIndex};
|
||
|
|
||
|
// insertAtBlank will clear the tag if the constructor throws
|
||
|
insertAtBlank(iter, hp, std::forward<Args>(args)...);
|
||
|
return std::make_pair(iter, true);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
template <bool Reset>
|
||
|
void clearImpl() noexcept {
|
||
|
if (chunks_ == Chunk::emptyInstance()) {
|
||
|
FOLLY_SAFE_DCHECK(empty() && bucket_count() == 0, "");
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// turn clear into reset if the table is >= 16 chunks so that
|
||
|
// we don't get too low a load factor
|
||
|
bool willReset = Reset || chunkMask_ + 1 >= 16;
|
||
|
|
||
|
auto origSize = size();
|
||
|
auto origCapacity = bucket_count();
|
||
|
if (willReset) {
|
||
|
this->beforeReset(origSize, origCapacity);
|
||
|
} else {
|
||
|
this->beforeClear(origSize, origCapacity);
|
||
|
}
|
||
|
|
||
|
if (!empty()) {
|
||
|
if (destroyItemOnClear()) {
|
||
|
for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
|
||
|
ChunkPtr chunk = chunks_ + ci;
|
||
|
auto iter = chunk->occupiedIter();
|
||
|
if (prefetchBeforeDestroy()) {
|
||
|
for (auto piter = iter; piter.hasNext();) {
|
||
|
this->prefetchValue(chunk->item(piter.next()));
|
||
|
}
|
||
|
}
|
||
|
while (iter.hasNext()) {
|
||
|
this->destroyItem(chunk->item(iter.next()));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (!willReset) {
|
||
|
// It's okay to do this in a separate loop because we only do it
|
||
|
// when the chunk count is small. That avoids a branch when we
|
||
|
// are promoting a clear to a reset for a large table.
|
||
|
auto c0c = chunks_[0].chunk0Capacity();
|
||
|
for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
|
||
|
chunks_[ci].clear();
|
||
|
}
|
||
|
chunks_[0].markEof(c0c);
|
||
|
}
|
||
|
if (kEnableItemIteration) {
|
||
|
sizeAndPackedBegin_.packedBegin() = ItemIter{}.pack();
|
||
|
}
|
||
|
sizeAndPackedBegin_.size_ = 0;
|
||
|
}
|
||
|
|
||
|
if (willReset) {
|
||
|
BytePtr rawAllocation = std::pointer_traits<BytePtr>::pointer_to(
|
||
|
*static_cast<uint8_t*>(static_cast<void*>(&*chunks_)));
|
||
|
std::size_t rawSize = chunkAllocSize(chunkMask_ + 1, bucket_count());
|
||
|
|
||
|
chunks_ = Chunk::emptyInstance();
|
||
|
chunkMask_ = 0;
|
||
|
|
||
|
this->afterReset(origSize, origCapacity, rawAllocation, rawSize);
|
||
|
} else {
|
||
|
this->afterClear(origSize, origCapacity);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void eraseImpl(ItemIter pos, HashPair hp) {
|
||
|
this->destroyItem(pos.item());
|
||
|
adjustSizeAndBeginBeforeErase(pos);
|
||
|
eraseBlank(pos, hp);
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
// The item needs to still be hashable during this call. If you want
|
||
|
// to intercept the value before it is destroyed (to extract it, for
|
||
|
// example), use eraseIterInto(pos, beforeDestroy).
|
||
|
void eraseIter(ItemIter pos) {
|
||
|
eraseIterInto(pos, [](value_type&&) {});
|
||
|
}
|
||
|
|
||
|
// The item needs to still be hashable during this call. If you want
|
||
|
// to intercept the value before it is destroyed (to extract it, for
|
||
|
// example), do so in the beforeDestroy callback.
|
||
|
template <typename BeforeDestroy>
|
||
|
void eraseIterInto(ItemIter pos, BeforeDestroy&& beforeDestroy) {
|
||
|
HashPair hp{};
|
||
|
if (pos.chunk()->hostedOverflowCount() != 0) {
|
||
|
hp = splitHash(this->computeItemHash(pos.citem()));
|
||
|
}
|
||
|
beforeDestroy(this->valueAtItemForExtract(pos.item()));
|
||
|
eraseImpl(pos, hp);
|
||
|
}
|
||
|
|
||
|
template <typename K>
|
||
|
std::size_t eraseKey(K const& key) {
|
||
|
return eraseKeyInto(key, [](value_type&&) {});
|
||
|
}
|
||
|
|
||
|
template <typename K, typename BeforeDestroy>
|
||
|
std::size_t eraseKeyInto(K const& key, BeforeDestroy&& beforeDestroy) {
|
||
|
if (UNLIKELY(size() == 0)) {
|
||
|
return 0;
|
||
|
}
|
||
|
auto hp = splitHash(this->computeKeyHash(key));
|
||
|
auto iter = findImpl(hp, key);
|
||
|
if (!iter.atEnd()) {
|
||
|
beforeDestroy(this->valueAtItemForExtract(iter.item()));
|
||
|
eraseImpl(iter, hp);
|
||
|
return 1;
|
||
|
} else {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void clear() noexcept {
|
||
|
if (kIsSanitizeAddress) {
|
||
|
// force recycling of heap memory
|
||
|
auto bc = bucket_count();
|
||
|
reset();
|
||
|
try {
|
||
|
reserveImpl(bc, 0, 0);
|
||
|
} catch (std::bad_alloc const&) {
|
||
|
// ASAN mode only, keep going
|
||
|
}
|
||
|
} else {
|
||
|
clearImpl<false>();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Like clear(), but always frees all dynamic storage allocated
|
||
|
// by the table.
|
||
|
void reset() noexcept {
|
||
|
clearImpl<true>();
|
||
|
}
|
||
|
|
||
|
// Get memory footprint, not including sizeof(*this).
|
||
|
std::size_t getAllocatedMemorySize() const {
|
||
|
std::size_t sum = 0;
|
||
|
visitAllocationClasses(
|
||
|
[&sum](std::size_t bytes, std::size_t n) { sum += bytes * n; });
|
||
|
return sum;
|
||
|
}
|
||
|
|
||
|
// Enumerates classes of allocated memory blocks currently owned
|
||
|
// by this table, calling visitor(allocationSize, allocationCount).
|
||
|
// This can be used to get a more accurate indication of memory footprint
|
||
|
// than getAllocatedMemorySize() if you have some way of computing the
|
||
|
// internal fragmentation of the allocator, such as JEMalloc's nallocx.
|
||
|
// The visitor might be called twice with the same allocationSize. The
|
||
|
// visitor's computation should produce the same result for visitor(8,
|
||
|
// 2) as for two calls to visitor(8, 1), for example. The visitor may
|
||
|
// be called with a zero allocationCount.
|
||
|
template <typename V>
|
||
|
void visitAllocationClasses(V&& visitor) const {
|
||
|
auto bc = bucket_count();
|
||
|
this->visitPolicyAllocationClasses(
|
||
|
(bc == 0 ? 0 : chunkAllocSize(chunkMask_ + 1, bc)),
|
||
|
size(),
|
||
|
bc,
|
||
|
visitor);
|
||
|
}
|
||
|
|
||
|
// visitor should take an Item const&
|
||
|
template <typename V>
|
||
|
void visitItems(V&& visitor) const {
|
||
|
if (empty()) {
|
||
|
return;
|
||
|
}
|
||
|
std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
|
||
|
auto chunk = &chunks_[0];
|
||
|
for (std::size_t i = 0; i <= maxChunkIndex; ++i, ++chunk) {
|
||
|
auto iter = chunk->occupiedIter();
|
||
|
if (prefetchBeforeCopy()) {
|
||
|
for (auto piter = iter; piter.hasNext();) {
|
||
|
this->prefetchValue(chunk->citem(piter.next()));
|
||
|
}
|
||
|
}
|
||
|
while (iter.hasNext()) {
|
||
|
visitor(chunk->citem(iter.next()));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// visitor should take two Item const*
|
||
|
template <typename V>
|
||
|
void visitContiguousItemRanges(V&& visitor) const {
|
||
|
if (empty()) {
|
||
|
return;
|
||
|
}
|
||
|
std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
|
||
|
auto chunk = &chunks_[0];
|
||
|
for (std::size_t i = 0; i <= maxChunkIndex; ++i, ++chunk) {
|
||
|
for (auto iter = chunk->occupiedRangeIter(); iter.hasNext();) {
|
||
|
auto be = iter.next();
|
||
|
FOLLY_SAFE_DCHECK(
|
||
|
chunk->occupied(be.first) && chunk->occupied(be.second - 1), "");
|
||
|
Item const* b = chunk->itemAddr(be.first);
|
||
|
visitor(b, b + (be.second - be.first));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
static std::size_t& histoAt(
|
||
|
std::vector<std::size_t>& histo,
|
||
|
std::size_t index) {
|
||
|
if (histo.size() <= index) {
|
||
|
histo.resize(index + 1);
|
||
|
}
|
||
|
return histo.at(index);
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
// Expensive
|
||
|
F14TableStats computeStats() const {
|
||
|
F14TableStats stats;
|
||
|
|
||
|
if (kIsDebug && kEnableItemIteration) {
|
||
|
// validate iteration
|
||
|
std::size_t n = 0;
|
||
|
ItemIter prev;
|
||
|
for (auto iter = begin(); iter != end(); iter.advance()) {
|
||
|
FOLLY_SAFE_DCHECK(n == 0 || iter.pack() < prev.pack(), "");
|
||
|
++n;
|
||
|
prev = iter;
|
||
|
}
|
||
|
FOLLY_SAFE_DCHECK(n == size(), "");
|
||
|
}
|
||
|
|
||
|
FOLLY_SAFE_DCHECK(
|
||
|
(chunks_ == Chunk::emptyInstance()) == (bucket_count() == 0), "");
|
||
|
|
||
|
std::size_t n1 = 0;
|
||
|
std::size_t n2 = 0;
|
||
|
auto cc = bucket_count() == 0 ? 0 : chunkMask_ + 1;
|
||
|
for (std::size_t ci = 0; ci < cc; ++ci) {
|
||
|
ChunkPtr chunk = chunks_ + ci;
|
||
|
FOLLY_SAFE_DCHECK(chunk->eof() == (ci == 0), "");
|
||
|
|
||
|
auto iter = chunk->occupiedIter();
|
||
|
|
||
|
std::size_t chunkOccupied = 0;
|
||
|
for (auto piter = iter; piter.hasNext(); piter.next()) {
|
||
|
++chunkOccupied;
|
||
|
}
|
||
|
n1 += chunkOccupied;
|
||
|
|
||
|
histoAt(stats.chunkOccupancyHisto, chunkOccupied)++;
|
||
|
histoAt(
|
||
|
stats.chunkOutboundOverflowHisto, chunk->outboundOverflowCount())++;
|
||
|
histoAt(stats.chunkHostedOverflowHisto, chunk->hostedOverflowCount())++;
|
||
|
|
||
|
while (iter.hasNext()) {
|
||
|
auto ii = iter.next();
|
||
|
++n2;
|
||
|
|
||
|
{
|
||
|
auto& item = chunk->citem(ii);
|
||
|
auto hp = splitHash(this->computeItemHash(item));
|
||
|
FOLLY_SAFE_DCHECK(chunk->tag(ii) == hp.second, "");
|
||
|
|
||
|
std::size_t dist = 1;
|
||
|
std::size_t index = hp.first;
|
||
|
std::size_t delta = probeDelta(hp);
|
||
|
while ((index & chunkMask_) != ci) {
|
||
|
index += delta;
|
||
|
++dist;
|
||
|
}
|
||
|
|
||
|
histoAt(stats.keyProbeLengthHisto, dist)++;
|
||
|
}
|
||
|
|
||
|
// misses could have any tag, so we do the dumb but accurate
|
||
|
// thing and just try them all
|
||
|
for (std::size_t ti = 0; ti < 256; ++ti) {
|
||
|
uint8_t tag = static_cast<uint8_t>(ti == 0 ? 1 : 0);
|
||
|
HashPair hp{ci, tag};
|
||
|
|
||
|
std::size_t dist = 1;
|
||
|
std::size_t index = hp.first;
|
||
|
std::size_t delta = probeDelta(hp);
|
||
|
for (std::size_t tries = 0; tries <= chunkMask_ &&
|
||
|
chunks_[index & chunkMask_].outboundOverflowCount() != 0;
|
||
|
++tries) {
|
||
|
index += delta;
|
||
|
++dist;
|
||
|
}
|
||
|
|
||
|
histoAt(stats.missProbeLengthHisto, dist)++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
FOLLY_SAFE_DCHECK(n1 == size(), "");
|
||
|
FOLLY_SAFE_DCHECK(n2 == size(), "");
|
||
|
|
||
|
#if FOLLY_HAS_RTTI
|
||
|
stats.policy = typeid(Policy).name();
|
||
|
#endif
|
||
|
stats.size = size();
|
||
|
stats.valueSize = sizeof(value_type);
|
||
|
stats.bucketCount = bucket_count();
|
||
|
stats.chunkCount = cc;
|
||
|
|
||
|
stats.totalBytes = sizeof(*this) + getAllocatedMemorySize();
|
||
|
stats.overheadBytes = stats.totalBytes - size() * sizeof(value_type);
|
||
|
|
||
|
return stats;
|
||
|
}
|
||
|
};
|
||
|
} // namespace detail
|
||
|
} // namespace f14
|
||
|
|
||
|
#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
|
||
|
|
||
|
} // namespace folly
|