// // Created by Kirill Zhukov on 20.04.2025. // #ifndef MEMTABLE_H #define MEMTABLE_H #include #include #include #include #include #include #include #include #include namespace usub::utils { template void write_sstable(const SkipList& memtable, const std::string& filename) { int fd = ::open(filename.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0644); if (fd < 0) throw std::runtime_error("Failed to open SSTable"); FILE* file = ::fdopen(fd, "wb"); if (!file) { ::close(fd); throw std::runtime_error("Failed to fdopen SSTable"); } uint64_t current_offset = 0; std::vector> index_entries; memtable.for_each([&](const auto& key, const auto& value) { uint8_t is_tombstone = 0; uint64_t version = 0; if constexpr (requires { value.is_tombstone; }) { is_tombstone = value.is_tombstone ? 1 : 0; version = value.version; } uint32_t key_len = sizeof(key); uint32_t value_len = value.size(); ::fwrite(&key_len, sizeof(key_len), 1, file); ::fwrite(&key, key_len, 1, file); ::fwrite(&value_len, sizeof(value_len), 1, file); ::fwrite(value.data(), value_len, 1, file); ::fwrite(&is_tombstone, sizeof(is_tombstone), 1, file); ::fwrite(&version, sizeof(version), 1, file); index_entries.emplace_back(key, current_offset); current_offset += sizeof(key_len) + key_len + sizeof(value_len) + value_len + sizeof(is_tombstone) + sizeof( version); }); uint64_t index_offset = current_offset; for (const auto& [key, offset] : index_entries) { uint32_t key_len = sizeof(key); ::fwrite(&key_len, sizeof(key_len), 1, file); ::fwrite(&key, key_len, 1, file); ::fwrite(&offset, sizeof(offset), 1, file); } ::fwrite(&index_offset, sizeof(index_offset), 1, file); ::fflush(file); ::fsync(fd); ::fclose(file); } template void read_sstable(SkipList& memtable, const std::string& filename) { std::ifstream sstable(filename, std::ios::binary); if (!sstable.is_open()) { throw std::runtime_error("Failed to open file: " + filename); } while (sstable.peek() != EOF) { uint32_t key_len = 0; sstable.read(reinterpret_cast(&key_len), sizeof(key_len)); if (key_len != sizeof(typename SkipList::key_type)) { throw std::runtime_error("Key size mismatch"); } typename SkipList::key_type key{}; sstable.read(reinterpret_cast(&key), key_len); uint32_t value_len = 0; sstable.read(reinterpret_cast(&value_len), sizeof(value_len)); std::string value(value_len, '\0'); sstable.read(value.data(), value_len); memtable.insert(key, value); } } template void write_sstable_with_index(const SkipList& memtable, const std::string& filename) { int fd = ::open(filename.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0644); if (fd < 0) { throw std::runtime_error("Failed to open file: " + filename); } FILE* file = ::fdopen(fd, "wb"); if (!file) { ::close(fd); throw std::runtime_error("Failed to fdopen file: " + filename); } std::vector> index_entries; uint64_t current_offset = 0; memtable.for_each_raw([&](const auto& key, const auto& value, bool is_tombstone, uint64_t version) { uint32_t key_len = key.size(); uint32_t value_len = value.size(); uint8_t tombstone_flag = is_tombstone ? 1 : 0; ::fwrite(&key_len, sizeof(key_len), 1, file); ::fwrite(key.data(), key_len, 1, file); ::fwrite(&value_len, sizeof(value_len), 1, file); ::fwrite(value.data(), value_len, 1, file); ::fwrite(&tombstone_flag, sizeof(tombstone_flag), 1, file); ::fwrite(&version, sizeof(version), 1, file); index_entries.emplace_back(key, current_offset); current_offset += sizeof(key_len) + key_len + sizeof(value_len) + value_len + sizeof(tombstone_flag) + sizeof(version); }); uint64_t index_start_offset = current_offset; // Записываем индекс for (const auto& [key, offset] : index_entries) { uint32_t key_len = key.size(); ::fwrite(&key_len, sizeof(key_len), 1, file); ::fwrite(key.data(), key_len, 1, file); ::fwrite(&offset, sizeof(offset), 1, file); } ::fwrite(&index_start_offset, sizeof(index_start_offset), 1, file); ::fflush(file); ::fsync(fd); ::fclose(file); } template void read_sstable_with_index(SkipList& memtable, const std::string& filename) { std::ifstream sstable(filename, std::ios::binary); if (!sstable.is_open()) { throw std::runtime_error("Failed to open file: " + filename); } sstable.seekg(-sizeof(uint64_t), std::ios::end); uint64_t index_offset = 0; sstable.read(reinterpret_cast(&index_offset), sizeof(index_offset)); sstable.seekg(index_offset, std::ios::beg); std::vector> index_entries; while (sstable.tellg() < static_cast(sstable.end)) { uint32_t key_len; if (!sstable.read(reinterpret_cast(&key_len), sizeof(key_len))) break; typename SkipList::key_type key{}; sstable.read(reinterpret_cast(&key), key_len); uint64_t offset = 0; sstable.read(reinterpret_cast(&offset), sizeof(offset)); index_entries.emplace_back(key, offset); } for (const auto& [key, offset] : index_entries) { sstable.seekg(offset, std::ios::beg); uint32_t key_len = 0; sstable.read(reinterpret_cast(&key_len), sizeof(key_len)); typename SkipList::key_type file_key{}; sstable.read(reinterpret_cast(&file_key), key_len); uint32_t value_len = 0; sstable.read(reinterpret_cast(&value_len), sizeof(value_len)); std::string value(value_len, '\0'); sstable.read(value.data(), value_len); memtable.insert(file_key, value); } } template void range_query_sstable(const std::string& filename, const typename SkipList::key_type& from_key, const typename SkipList::key_type& to_key, Callback&& callback) { std::ifstream sstable(filename, std::ios::binary); if (!sstable.is_open()) { throw std::runtime_error("Failed to open file: " + filename); } sstable.seekg(-sizeof(uint64_t), std::ios::end); uint64_t index_offset = 0; sstable.read(reinterpret_cast(&index_offset), sizeof(index_offset)); sstable.seekg(index_offset, std::ios::beg); std::vector> index_entries; while (sstable.peek() != EOF) { uint32_t key_len; if (!sstable.read(reinterpret_cast(&key_len), sizeof(key_len))) break; typename SkipList::key_type key{}; sstable.read(reinterpret_cast(&key), key_len); uint64_t offset = 0; sstable.read(reinterpret_cast(&offset), sizeof(offset)); index_entries.emplace_back(key, offset); } auto it = std::lower_bound(index_entries.begin(), index_entries.end(), from_key, [](const auto& pair, const auto& key) { return pair.first < key; }); for (; it != index_entries.end() && it->first <= to_key; ++it) { sstable.seekg(it->second, std::ios::beg); uint32_t key_len; sstable.read(reinterpret_cast(&key_len), sizeof(key_len)); typename SkipList::key_type file_key{}; sstable.read(reinterpret_cast(&file_key), key_len); uint32_t value_len; sstable.read(reinterpret_cast(&value_len), sizeof(value_len)); std::string value(value_len, '\0'); sstable.read(value.data(), value_len); callback(file_key, value); } } template void replay_wal(SkipList& memtable, const std::string& wal_filename) { std::ifstream wal(wal_filename, std::ios::binary); if (!wal.is_open()) { throw std::runtime_error("Failed to open WAL file: " + wal_filename); } while (wal.peek() != EOF) { uint8_t op; wal.read(reinterpret_cast(&op), sizeof(op)); uint32_t key_len; wal.read(reinterpret_cast(&key_len), sizeof(key_len)); std::string key(key_len, '\0'); wal.read(key.data(), key_len); if (op == 0) { // PUT uint32_t value_len; wal.read(reinterpret_cast(&value_len), sizeof(value_len)); std::string value(value_len, '\0'); wal.read(value.data(), value_len); memtable.insert(key, value); } else if (op == 1) { memtable.erase(key); } else { throw std::runtime_error("Unknown WAL operation code"); } } } template void read_sstable_with_mmap(SkipList& memtable, const std::string& filename) { int fd = ::open(filename.c_str(), O_RDONLY); if (fd < 0) throw std::runtime_error("Failed to open SSTable"); struct stat st; if (fstat(fd, &st) != 0) { ::close(fd); throw std::runtime_error("Failed to stat SSTable"); } void* data = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (data == MAP_FAILED) { ::close(fd); throw std::runtime_error("Failed to mmap SSTable"); } const char* ptr = reinterpret_cast(data); const char* end = ptr + st.st_size; uint64_t index_offset = *reinterpret_cast(end - sizeof(uint64_t)); const char* index_ptr = ptr + index_offset; std::vector> index_entries; while (index_ptr < end - sizeof(uint64_t)) { uint32_t key_len = *reinterpret_cast(index_ptr); index_ptr += sizeof(uint32_t); std::string key(key_len, '\0'); std::memcpy(key.data(), index_ptr, key_len); index_ptr += key_len; uint64_t offset = *reinterpret_cast(index_ptr); index_ptr += sizeof(uint64_t); index_entries.emplace_back(key, offset); } for (const auto& [key, offset] : index_entries) { const char* record = ptr + offset; uint32_t key_len = *reinterpret_cast(record); record += sizeof(uint32_t); std::string file_key(key_len, '\0'); std::memcpy(file_key.data(), record, key_len); record += key_len; uint32_t value_len = *reinterpret_cast(record); record += sizeof(uint32_t); std::string value(value_len, '\0'); std::memcpy(value.data(), record, value_len); record += value_len; uint8_t tombstone_flag = *reinterpret_cast(record); record += sizeof(uint8_t); uint64_t version = *reinterpret_cast(record); record += sizeof(uint64_t); memtable.insert_raw(file_key, value, tombstone_flag == 1, version); } munmap(data, st.st_size); ::close(fd); } template void optimized_range_query_sstable(const std::string& filename, const typename SkipList::key_type& from_key, const typename SkipList::key_type& to_key, Callback&& callback) { int fd = ::open(filename.c_str(), O_RDONLY); if (fd < 0) throw std::runtime_error("Failed to open file"); struct stat st; if (fstat(fd, &st) != 0) { ::close(fd); throw std::runtime_error("Failed to stat file"); } void* data = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (data == MAP_FAILED) { ::close(fd); throw std::runtime_error("Failed to mmap file"); } const char* ptr = reinterpret_cast(data); const char* end = ptr + st.st_size; uint64_t index_offset = *reinterpret_cast(end - sizeof(uint64_t)); const char* index_ptr = ptr + index_offset; std::vector> index_entries; while (index_ptr < end - sizeof(uint64_t)) { uint32_t key_len = *reinterpret_cast(index_ptr); index_ptr += sizeof(uint32_t); typename SkipList::key_type key; std::memcpy(&key, index_ptr, key_len); index_ptr += key_len; uint64_t offset = *reinterpret_cast(index_ptr); index_ptr += sizeof(uint64_t); index_entries.emplace_back(key, offset); } // lower_bound по from_key auto it = std::lower_bound(index_entries.begin(), index_entries.end(), from_key, [](const auto& pair, const auto& key) { return pair.first < key; }); for (; it != index_entries.end() && it->first <= to_key; ++it) { const char* record = ptr + it->second; uint32_t key_len = *reinterpret_cast(record); record += sizeof(uint32_t); typename SkipList::key_type file_key; std::memcpy(&file_key, record, key_len); record += key_len; uint32_t value_len = *reinterpret_cast(record); record += sizeof(uint32_t); std::string value(value_len, '\0'); std::memcpy(value.data(), record, value_len); record += value_len; uint8_t is_tombstone = *reinterpret_cast(record); record += sizeof(uint8_t); uint64_t version = *reinterpret_cast(record); record += sizeof(uint64_t); if (!is_tombstone) { callback(file_key, value); } } munmap(data, st.st_size); ::close(fd); } } #endif //MEMTABLE_H