/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * RecordIO: self-synchronizing stream of variable length records * * RecordIO gives you the ability to write a stream of variable length records * and read them later even in the face of data corruption -- randomly inserted * or deleted chunks of the file, or modified data. When reading, you may lose * corrupted records, but the stream will resynchronize automatically. */ #pragma once #define FOLLY_IO_RECORDIO_H_ #include #include #include #include #include #include #include namespace folly { /** * Class to write a stream of RecordIO records to a file. * * RecordIOWriter is thread-safe */ class RecordIOWriter { public: /** * Create a RecordIOWriter around a file; will append to the end of * file if it exists. * * Each file must have a non-zero file id, which is embedded in all * record headers. Readers will only return records with the requested * file id (or, if the reader is created with fileId=0 in the constructor, * the reader will return all records). File ids are only used to allow * resynchronization if you store RecordIO records (with headers) inside * other RecordIO records (for example, if a record consists of a fragment * from another RecordIO file). If you're not planning to do that, * the defaults are fine. */ explicit RecordIOWriter(File file, uint32_t fileId = 1); /** * Write a record. We will use at most headerSize() bytes of headroom, * you might want to arrange that before copying your data into it. */ void write(std::unique_ptr buf); /** * Return the position in the file where the next byte will be written. * Conservative, as stuff can be written at any time from another thread. */ off_t filePos() const { return filePos_; } private: File file_; uint32_t fileId_; std::unique_lock writeLock_; std::atomic filePos_; }; /** * Class to read from a RecordIO file. Will skip invalid records. */ class RecordIOReader { public: class Iterator; /** * RecordIOReader is iterable, returning pairs of ByteRange (record content) * and position in file where the record (including header) begins. * Note that the position includes the header, that is, it can be passed back * to seek(). */ typedef Iterator iterator; typedef Iterator const_iterator; typedef std::pair value_type; typedef value_type& reference; typedef const value_type& const_reference; /** * A record reader with a fileId of 0 will return all records. * A record reader with a non-zero fileId will only return records where * the fileId matches. */ explicit RecordIOReader(File file, uint32_t fileId = 0); Iterator cbegin() const; Iterator begin() const; Iterator cend() const; Iterator end() const; /** * Create an iterator to the first valid record after pos. */ Iterator seek(off_t pos) const; private: MemoryMapping map_; uint32_t fileId_; }; namespace recordio_helpers { // We're exposing the guts of the RecordIO implementation for two reasons: // 1. It makes unit testing easier, and // 2. It allows you to build different RecordIO readers / writers that use // different storage systems underneath (not standard files) /** * Header size. */ constexpr size_t headerSize(); // defined in RecordIO-inl.h /** * Write a header in the buffer. We will prepend the header to the front * of the chain. Do not write the buffer if empty (we don't allow empty * records). Returns the total length, including header (0 if empty) * (same as buf->computeChainDataLength(), but likely faster) * * The fileId should be unique per stream and allows you to have RecordIO * headers stored inside the data (for example, have an entire RecordIO * file stored as a record inside another RecordIO file). The fileId may * not be 0. */ size_t prependHeader(std::unique_ptr& buf, uint32_t fileId = 1); /** * Search for the first valid record that begins in searchRange (which must be * a subrange of wholeRange). Returns the record data (not the header) if * found, ByteRange() otherwise. * * The fileId may be 0, in which case we'll return the first valid record for * *any* fileId, or non-zero, in which case we'll only look for records with * the requested fileId. */ struct RecordInfo { uint32_t fileId; ByteRange record; }; RecordInfo findRecord(ByteRange searchRange, ByteRange wholeRange, uint32_t fileId); /** * Search for the first valid record in range. */ RecordInfo findRecord(ByteRange range, uint32_t fileId); /** * Check if the Record Header is valid at the beginning of range. * Useful to check the validity of the header before building the entire record * in IOBuf. If the record is from storage device (e.g. flash) then, it * is better to make sure that the header is valid before reading the data * from the storage device. * Returns true if valid, false otherwise. */ bool validateRecordHeader(ByteRange range, uint32_t fileId); /** * Check if there Record Data is valid (to be used after validating the header * separately) * Returns the record data (not the header) if the record data is valid, * ByteRange() otherwise. */ RecordInfo validateRecordData(ByteRange range); /** * Check if there is a valid record at the beginning of range. This validates * both record header and data and Returns the * record data (not the header) if the record is valid, ByteRange() otherwise. */ RecordInfo validateRecord(ByteRange range, uint32_t fileId); } // namespace recordio_helpers } // namespace folly #include