/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #if FOLLY_SSE_PREREQ(4, 2) #include #include #endif namespace folly { namespace detail { uint32_t crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum); #if FOLLY_SSE_PREREQ(4, 2) uint32_t crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum); // Fast SIMD implementation of CRC-32 for x86 with pclmul uint32_t crc32_hw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { uint32_t sum = startingChecksum; size_t offset = 0; // Process unaligned bytes if ((uintptr_t)data & 15) { size_t limit = std::min(nbytes, -(uintptr_t)data & 15); sum = crc32_sw(data, limit, sum); offset += limit; nbytes -= limit; } if (nbytes >= 16) { sum = crc32_hw_aligned(sum, (const __m128i*)(data + offset), nbytes / 16); offset += nbytes & ~15; nbytes &= 15; } // Remaining unaligned bytes return crc32_sw(data + offset, nbytes, sum); } bool crc32c_hw_supported() { static folly::CpuId id; return id.sse42(); } bool crc32_hw_supported() { static folly::CpuId id; return id.sse42(); } #else uint32_t crc32_hw( const uint8_t* /* data */, size_t /* nbytes */, uint32_t /* startingChecksum */) { throw std::runtime_error("crc32_hw is not implemented on this platform"); } bool crc32c_hw_supported() { return false; } bool crc32_hw_supported() { return false; } #endif template uint32_t crc_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { // Reverse the bits in the starting checksum so they'll be in the // right internal format for Boost's CRC engine. // O(1)-time, branchless bit reversal algorithm from // http://graphics.stanford.edu/~seander/bithacks.html startingChecksum = ((startingChecksum >> 1) & 0x55555555) | ((startingChecksum & 0x55555555) << 1); startingChecksum = ((startingChecksum >> 2) & 0x33333333) | ((startingChecksum & 0x33333333) << 2); startingChecksum = ((startingChecksum >> 4) & 0x0f0f0f0f) | ((startingChecksum & 0x0f0f0f0f) << 4); startingChecksum = ((startingChecksum >> 8) & 0x00ff00ff) | ((startingChecksum & 0x00ff00ff) << 8); startingChecksum = (startingChecksum >> 16) | (startingChecksum << 16); boost::crc_optimal<32, CRC_POLYNOMIAL, ~0U, 0, true, true> sum( startingChecksum); sum.process_bytes(data, nbytes); return sum.checksum(); } uint32_t crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { constexpr uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41; return crc_sw(data, nbytes, startingChecksum); } uint32_t crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { constexpr uint32_t CRC32_POLYNOMIAL = 0x04C11DB7; return crc_sw(data, nbytes, startingChecksum); } } // namespace detail uint32_t crc32c(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { if (detail::crc32c_hw_supported()) { return detail::crc32c_hw(data, nbytes, startingChecksum); } else { return detail::crc32c_sw(data, nbytes, startingChecksum); } } uint32_t crc32(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { if (detail::crc32_hw_supported()) { return detail::crc32_hw(data, nbytes, startingChecksum); } else { return detail::crc32_sw(data, nbytes, startingChecksum); } } uint32_t crc32_type(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { return ~crc32(data, nbytes, startingChecksum); } uint32_t crc32_combine(uint32_t crc1, uint32_t crc2, size_t crc2len) { // Append up to 32 bits of zeroes in the normal way uint8_t data[4] = {0, 0, 0, 0}; auto len = crc2len & 3; if (len) { crc1 = crc32(data, len, crc1); } if (detail::crc32_hw_supported()) { return detail::crc32_combine_hw(crc1, crc2, crc2len); } else { return detail::crc32_combine_sw(crc1, crc2, crc2len); } } uint32_t crc32c_combine(uint32_t crc1, uint32_t crc2, size_t crc2len) { // Append up to 32 bits of zeroes in the normal way uint8_t data[4] = {0, 0, 0, 0}; auto len = crc2len & 3; if (len) { crc1 = crc32c(data, len, crc1); } if (detail::crc32_hw_supported()) { return detail::crc32c_combine_hw(crc1, crc2, crc2len - len); } else { return detail::crc32c_combine_sw(crc1, crc2, crc2len - len); } } } // namespace folly