/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include namespace folly { using namespace recordio_helpers; RecordIOWriter::RecordIOWriter(File file, uint32_t fileId) : file_(std::move(file)), fileId_(fileId), writeLock_(file_, std::defer_lock), filePos_(0) { if (!writeLock_.try_lock()) { throw std::runtime_error("RecordIOWriter: file locked by another process"); } struct stat st; checkUnixError(fstat(file_.fd(), &st), "fstat() failed"); filePos_ = st.st_size; } void RecordIOWriter::write(std::unique_ptr buf) { size_t totalLength = prependHeader(buf, fileId_); if (totalLength == 0) { return; // nothing to do } DCHECK_EQ(buf->computeChainDataLength(), totalLength); // We're going to write. Reserve space for ourselves. off_t pos = filePos_.fetch_add(off_t(totalLength)); #if FOLLY_HAVE_PWRITEV auto iov = buf->getIov(); ssize_t bytes = pwritevFull(file_.fd(), iov.data(), iov.size(), pos); #else buf->unshare(); buf->coalesce(); ssize_t bytes = pwriteFull(file_.fd(), buf->data(), buf->length(), pos); #endif checkUnixError(bytes, "pwrite() failed"); DCHECK_EQ(size_t(bytes), totalLength); } RecordIOReader::RecordIOReader(File file, uint32_t fileId) : map_(std::move(file)), fileId_(fileId) {} RecordIOReader::Iterator::Iterator(ByteRange range, uint32_t fileId, off_t pos) : range_(range), fileId_(fileId), recordAndPos_(ByteRange(), 0) { if (size_t(pos) >= range_.size()) { // Note that this branch can execute if pos is negative as well. recordAndPos_.second = off_t(-1); range_.clear(); } else { recordAndPos_.second = pos; range_.advance(size_t(pos)); advanceToValid(); } } void RecordIOReader::Iterator::advanceToValid() { ByteRange record = findRecord(range_, fileId_).record; if (record.empty()) { recordAndPos_ = std::make_pair(ByteRange(), off_t(-1)); range_.clear(); // at end } else { auto skipped = size_t(record.begin() - range_.begin()); DCHECK_GE(skipped, headerSize()); skipped -= headerSize(); range_.advance(skipped); recordAndPos_.first = record; recordAndPos_.second += off_t(skipped); } } namespace recordio_helpers { using recordio_detail::Header; namespace { constexpr uint32_t kHashSeed = 0xdeadbeef; // for mcurtiss uint32_t headerHash(const Header& header) { return hash::SpookyHashV2::Hash32( &header, offsetof(Header, headerHash), kHashSeed); } std::pair dataLengthAndHash(const IOBuf* buf) { size_t len = 0; hash::SpookyHashV2 hasher; hasher.Init(kHashSeed, kHashSeed); for (auto br : *buf) { len += br.size(); hasher.Update(br.data(), br.size()); } uint64_t hash1; uint64_t hash2; hasher.Final(&hash1, &hash2); if (len + headerSize() >= std::numeric_limits::max()) { throw std::invalid_argument("Record length must fit in 32 bits"); } return std::make_pair(len, static_cast(hash1)); } std::size_t dataHash(ByteRange range) { return hash::SpookyHashV2::Hash64(range.data(), range.size(), kHashSeed); } } // namespace size_t prependHeader(std::unique_ptr& buf, uint32_t fileId) { if (fileId == 0) { throw std::invalid_argument("invalid file id"); } auto lengthAndHash = dataLengthAndHash(buf.get()); if (lengthAndHash.first == 0) { return 0; // empty, nothing to do, no zero-length records } // Prepend to the first buffer in the chain if we have room, otherwise // prepend a new buffer. if (buf->headroom() >= headerSize()) { buf->unshareOne(); buf->prepend(headerSize()); } else { auto b = IOBuf::create(headerSize()); b->append(headerSize()); b->appendChain(std::move(buf)); buf = std::move(b); } auto header = reinterpret_cast(buf->writableData()); memset(header, 0, sizeof(Header)); header->magic = Header::kMagic; header->fileId = fileId; header->dataLength = uint32_t(lengthAndHash.first); header->dataHash = lengthAndHash.second; header->headerHash = headerHash(*header); return lengthAndHash.first + headerSize(); } bool validateRecordHeader(ByteRange range, uint32_t fileId) { if (range.size() < headerSize()) { // records may not be empty return false; } auto header = reinterpret_cast(range.begin()); if (header->magic != Header::kMagic || header->version != 0 || header->hashFunction != 0 || header->flags != 0 || (fileId != 0 && header->fileId != fileId)) { return false; } if (headerHash(*header) != header->headerHash) { return false; } return true; } RecordInfo validateRecordData(ByteRange range) { if (range.size() <= headerSize()) { // records may not be empty return {0, {}}; } auto header = reinterpret_cast(range.begin()); range.advance(sizeof(Header)); if (header->dataLength > range.size()) { return {0, {}}; } range.reset(range.begin(), header->dataLength); if (dataHash(range) != header->dataHash) { return {0, {}}; } return {header->fileId, range}; } RecordInfo validateRecord(ByteRange range, uint32_t fileId) { if (!validateRecordHeader(range, fileId)) { return {0, {}}; } return validateRecordData(range); } RecordInfo findRecord(ByteRange searchRange, ByteRange wholeRange, uint32_t fileId) { static const uint32_t magic = Header::kMagic; static const ByteRange magicRange( reinterpret_cast(&magic), sizeof(magic)); DCHECK_GE(searchRange.begin(), wholeRange.begin()); DCHECK_LE(searchRange.end(), wholeRange.end()); const uint8_t* start = searchRange.begin(); const uint8_t* end = std::min(searchRange.end(), wholeRange.end() - sizeof(Header)); // end-1: the last place where a Header could start while (start < end) { auto p = ByteRange(start, end + sizeof(magic)).find(magicRange); if (p == ByteRange::npos) { break; } start += p; auto r = validateRecord(ByteRange(start, wholeRange.end()), fileId); if (!r.record.empty()) { return r; } // No repeated prefix in magic, so we can do better than start++ start += sizeof(magic); } return {0, {}}; } } // namespace recordio_helpers } // namespace folly