From 1acf6aac83c34e6928e5008e0e10a7e571354613 Mon Sep 17 00:00:00 2001 From: chuandew Date: Sun, 16 Nov 2025 16:32:06 +0800 Subject: [PATCH 1/5] [fix][client] Remove attr and slices cache in vfs data --- src/client/vfs/data/file.cc | 6 +-- src/client/vfs/data/reader/chunk_reader.cc | 56 ++++------------------ src/client/vfs/data/reader/chunk_reader.h | 10 ---- src/client/vfs/data/reader/file_reader.cc | 25 +--------- src/client/vfs/data/reader/file_reader.h | 4 -- 5 files changed, 13 insertions(+), 88 deletions(-) diff --git a/src/client/vfs/data/file.cc b/src/client/vfs/data/file.cc index 2c11d2d3f..afdbaf16a 100644 --- a/src/client/vfs/data/file.cc +++ b/src/client/vfs/data/file.cc @@ -72,11 +72,7 @@ Status File::PreCheck() { Status File::Write(ContextSPtr ctx, const char* buf, uint64_t size, uint64_t offset, uint64_t* out_wsize) { DINGOFS_RETURN_NOT_OK(PreCheck()); - Status s = file_writer_->Write(ctx, buf, size, offset, out_wsize); - if (s.ok()) { - file_reader_->Invalidate(); - } - return s; + return file_writer_->Write(ctx, buf, size, offset, out_wsize); } Status File::Read(ContextSPtr ctx, DataBuffer* data_buffer, uint64_t size, diff --git a/src/client/vfs/data/reader/chunk_reader.cc b/src/client/vfs/data/reader/chunk_reader.cc index d4306cb87..1175c4e12 100644 --- a/src/client/vfs/data/reader/chunk_reader.cc +++ b/src/client/vfs/data/reader/chunk_reader.cc @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -267,9 +266,6 @@ void ChunkReader::DoRead(ContextSPtr ctx, ChunkReadReq& req, UUID(), ret.ToString(), retry, chunk_offset, end_read_chunk_offet, read_file_offset, end_read_file_offset); - if (ret.IsNotFound()) { - InvalidateSlices(chunk_slices.version); - } } while (ret.IsNotFound() && retry++ < FLAGS_client_vfs_read_max_retry_block_not_found); @@ -278,14 +274,6 @@ void ChunkReader::DoRead(ContextSPtr ctx, ChunkReadReq& req, cb(ret); } -void ChunkReader::Invalidate() { - VLOG(4) << fmt::format("{} Invalidate, cversion: {}", UUID(), - cversion_.load(std::memory_order_relaxed)); - std::lock_guard lg(mutex_); - cversion_ = kInvalidVersion; - slices_.clear(); -} - static std::string SlicesToString(const std::vector& slices) { std::ostringstream oss; oss << "["; @@ -303,47 +291,23 @@ Status ChunkReader::GetSlices(ContextSPtr ctx, ChunkSlices* chunk_slices) { auto* tracer = hub_->GetTracer(); auto span = tracer->StartSpanWithContext(kVFSDataMoudule, METHOD_NAME(), ctx); - std::lock_guard lg(mutex_); - if (cversion_ == kInvalidVersion) { - VLOG(3) << fmt::format("{} cached chunk_slices invalidate, read from meta", - UUID()); + std::vector slices; + uint64_t chunk_version = 0; + DINGOFS_RETURN_NOT_OK(hub_->GetMetaSystem()->ReadSlice( + span->GetContext(), chunk_.ino, chunk_.index, fh_, &slices, + chunk_version)); - auto slice_span = tracer->StartSpanWithParent( - kVFSDataMoudule, "ChunkReader::GetSlices.ReadSlice", *span); + chunk_slices->version = chunk_version; + chunk_slices->slices = std::move(slices); - std::vector slices; - uint64_t chunk_version = 0; - DINGOFS_RETURN_NOT_OK(hub_->GetMetaSystem()->ReadSlice( - slice_span->GetContext(), chunk_.ino, chunk_.index, fh_, &slices, - chunk_version)); - - cversion_.store(next_version_, std::memory_order_relaxed); - slices_ = std::move(slices); - - next_version_++; - } - - chunk_slices->version = cversion_; - chunk_slices->slices = slices_; - - VLOG(9) << fmt::format("{} GetSlices, version: {}, slices: {}", UUID(), - chunk_slices->version, + VLOG(9) << fmt::format("{} GetSlices, version: {}, slice_num: {}, slices: {}", + UUID(), chunk_slices->version, + chunk_slices->slices.size(), SlicesToString(chunk_slices->slices)); return Status::OK(); } -void ChunkReader::InvalidateSlices(uint32_t version) { - VLOG(4) << fmt::format("{} InvalidateSlices, version: {}, cversion: {}", - UUID(), version, - cversion_.load(std::memory_order_relaxed)); - std::lock_guard lg(mutex_); - if (cversion_ <= version) { - cversion_ = kInvalidVersion; - slices_.clear(); - } -} - uint64_t ChunkReader::GetBlockSize() const { return block_size_; } } // namespace vfs diff --git a/src/client/vfs/data/reader/chunk_reader.h b/src/client/vfs/data/reader/chunk_reader.h index 6ed5b37ff..fc1a0b5f1 100644 --- a/src/client/vfs/data/reader/chunk_reader.h +++ b/src/client/vfs/data/reader/chunk_reader.h @@ -62,15 +62,11 @@ class ChunkReader { void ReadAsync(ContextSPtr ctx, ChunkReadReq& req, StatusCallback cb); - void Invalidate(); - private: void DoRead(ContextSPtr ctx, ChunkReadReq& req, StatusCallback cb); Status GetSlices(ContextSPtr ctx, ChunkSlices* chunk_slices); - void InvalidateSlices(uint32_t version); - static void BlockReadCallback(ContextSPtr ctx, ChunkReader* reader, const BlockCacheReadReq& req, ReaderSharedState& shared, Status s); @@ -86,12 +82,6 @@ class ChunkReader { const uint64_t fh_; const Chunk chunk_; const uint64_t block_size_; - - std::mutex mutex_; - // maybe version from mds - uint32_t next_version_{1}; // Start from 1, 0 is invalid version - std::atomic cversion_{kInvalidVersion}; - std::vector slices_; }; using ChunkReaderUptr = std::shared_ptr; diff --git a/src/client/vfs/data/reader/file_reader.cc b/src/client/vfs/data/reader/file_reader.cc index 0b5eba46d..48a2a65b3 100644 --- a/src/client/vfs/data/reader/file_reader.cc +++ b/src/client/vfs/data/reader/file_reader.cc @@ -196,33 +196,12 @@ Status FileReader::Read(ContextSPtr ctx, DataBuffer* data_buffer, uint64_t size, return ret; } -void FileReader::Invalidate() { - VLOG(1) << fmt::format("FileReader::Invalidate, ino: {}", ino_); - - std::lock_guard lock(mutex_); - validated_ = false; - for (auto& [index, reader] : chunk_readers_) { - reader->Invalidate(); - } -} - Status FileReader::GetAttr(ContextSPtr ctx, Attr* attr) { auto span = vfs_hub_->GetTracer()->StartSpanWithContext(kVFSDataMoudule, METHOD_NAME(), ctx); - std::lock_guard lock(mutex_); - if (validated_) { - *attr = attr_; - return Status::OK(); - } - - Status s = - vfs_hub_->GetMetaSystem()->GetAttr(span->GetContext(), ino_, &attr_); - - if (s.ok()) { - validated_ = true; - *attr = attr_; - } else { + Status s = vfs_hub_->GetMetaSystem()->GetAttr(span->GetContext(), ino_, attr); + if (!s.ok()) { LOG(WARNING) << fmt::format( "FileReader::GetAttr failed, ino: {}, status: {}", ino_, s.ToString()); } diff --git a/src/client/vfs/data/reader/file_reader.h b/src/client/vfs/data/reader/file_reader.h index ea9ef8c86..08c2d4bb7 100644 --- a/src/client/vfs/data/reader/file_reader.h +++ b/src/client/vfs/data/reader/file_reader.h @@ -42,8 +42,6 @@ class FileReader { Status Read(ContextSPtr ctx, DataBuffer* data_buffer, uint64_t size, uint64_t offset, uint64_t* out_rsize); - void Invalidate(); - private: Status GetAttr(ContextSPtr ctx, Attr* attr); @@ -59,8 +57,6 @@ class FileReader { uint64_t last_intime_warmup_trigger_{0}; std::mutex mutex_; - bool validated_{false}; - Attr attr_; // chunk index -> chunk reader std::unordered_map chunk_readers_; }; From c71c4053cfabe02f8f05eacf4e8c9b55b2006ffa Mon Sep 17 00:00:00 2001 From: Zihui Deng <451809218@qq.com> Date: Thu, 13 Nov 2025 14:27:35 +0800 Subject: [PATCH 2/5] [fix][mds] Fixup fs tree view issues on page. --- src/common/options/client.cc | 2 +- src/mds/filesystem/filesystem.cc | 2 +- src/mds/filesystem/fs_utils.cc | 20 +++++++++++--------- src/mds/service/fsstat_service.cc | 6 +----- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/common/options/client.cc b/src/common/options/client.cc index 3828522f0..2e6be456e 100644 --- a/src/common/options/client.cc +++ b/src/common/options/client.cc @@ -44,7 +44,7 @@ DEFINE_int32(client_bthread_worker_num, 0, "bthread worker num"); DEFINE_bool(client_access_logging, true, "enable access log"); DEFINE_validator(client_access_logging, brpc::PassValidate); -DEFINE_bool(client_access_logging_verbose, false, +DEFINE_bool(client_access_logging_verbose, true, "enable access log with verbose"); DEFINE_validator(client_access_logging_verbose, brpc::PassValidate); diff --git a/src/mds/filesystem/filesystem.cc b/src/mds/filesystem/filesystem.cc index 589b2a962..2e938ca13 100644 --- a/src/mds/filesystem/filesystem.cc +++ b/src/mds/filesystem/filesystem.cc @@ -2332,7 +2332,7 @@ static std::set GetDeletedBucketIds(int64_t mds_id, const pb::mds::Has auto old_bucketset = old_hash.distributions().find(mds_id); auto bucketset = hash.distributions().find(mds_id); if (old_bucketset == old_hash.distributions().end() || bucketset == hash.distributions().end()) { - DINGO_LOG(ERROR) << fmt::format("[fs] mds_id({}) not found in old or new hash partition.", mds_id); + DINGO_LOG(INFO) << fmt::format("[fs] mds_id({}) not found in old or new hash partition.", mds_id); return deleted_bucket_ids; } diff --git a/src/mds/filesystem/fs_utils.cc b/src/mds/filesystem/fs_utils.cc index 70ce56a51..958bfb60d 100644 --- a/src/mds/filesystem/fs_utils.cc +++ b/src/mds/filesystem/fs_utils.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -261,10 +262,10 @@ Status FsUtils::GenDirJsonString(Ino parent, std::string& output) { const uint32_t fs_id = fs_info_.fs_id(); - std::map dentries; + std::vector dentries; Trace trace; ScanDentryOperation operation(trace, fs_id, parent, [&](const DentryEntry& dentry) -> bool { - dentries.insert(std::make_pair(dentry.ino(), dentry)); + dentries.push_back(dentry); return true; }); @@ -275,13 +276,14 @@ Status FsUtils::GenDirJsonString(Ino parent, std::string& output) { // batch get inode attrs std::map attrs; uint32_t count = 0; - std::vector inoes; - inoes.reserve(kBatchGetSize); - for (auto& [ino, dentry] : dentries) { - inoes.push_back(ino); + std::set inoes; + for (const auto& dentry : dentries) { + inoes.insert(dentry.ino()); if (++count == dentries.size() || inoes.size() == kBatchGetSize) { - BatchGetInodeAttrOperation operation(trace, fs_id, inoes); + // take out duplicate inoes + std::vector inoes_vec(inoes.begin(), inoes.end()); + BatchGetInodeAttrOperation operation(trace, fs_id, inoes_vec); status = operation_processor_->RunAlone(&operation); if (!status.ok()) { DINGO_LOG(ERROR) << fmt::format("[fsutils] batch get inode attrs fail, {}.", status.error_str()); @@ -304,8 +306,8 @@ Status FsUtils::GenDirJsonString(Ino parent, std::string& output) { // gen json nlohmann::json doc = nlohmann::json::array(); - for (auto& [ino, dentry] : dentries) { - auto it = attrs.find(ino); + for (const auto& dentry : dentries) { + auto it = attrs.find(dentry.ino()); if (it == attrs.end()) { DINGO_LOG(ERROR) << fmt::format("[fsutils] not found attr for dentry({}/{})", dentry.ino(), dentry.name()); continue; diff --git a/src/mds/service/fsstat_service.cc b/src/mds/service/fsstat_service.cc index a5c77c23e..d4d9082b1 100644 --- a/src/mds/service/fsstat_service.cc +++ b/src/mds/service/fsstat_service.cc @@ -1443,17 +1443,13 @@ static void RenderInodePage(const AttrEntry& attr, butil::IOBufBuilder& os) { RenderJsonPage("dingofs inode details", header, json, os); } -static void RenderChunk(uint64_t& count, uint64_t chunk_size, ChunkEntry chunk, butil::IOBufBuilder& os) { +static void RenderChunk(uint64_t& count, uint64_t chunk_size, const ChunkEntry& chunk, butil::IOBufBuilder& os) { struct OffsetRange { uint64_t start; uint64_t end; std::vector slices; }; - // sort by offset - std::stable_sort(chunk.mutable_slices()->begin(), chunk.mutable_slices()->end(), - [](const SliceEntry& a, const SliceEntry& b) { return a.offset() < b.offset(); }); - // get offset ranges std::set offsets; if (!chunk.slices().empty()) { From 49205b986d08f9e7720d068455c304655183d59e Mon Sep 17 00:00:00 2001 From: Zihui Deng <451809218@qq.com> Date: Fri, 14 Nov 2025 20:09:52 +0800 Subject: [PATCH 3/5] [fix][mds] Fixup readdir issues. --- proto | 2 +- src/client/fuse/fuse_op.cc | 129 +++++++--- src/client/vfs/metasystem/mds/dir_iterator.cc | 15 +- src/client/vfs/metasystem/mds/dir_iterator.h | 13 +- src/client/vfs/metasystem/mds/filesystem.cc | 4 +- src/client/vfs/metasystem/mds/helper.h | 4 +- src/client/vfs/metasystem/mds/mds_client.cc | 27 ++- src/client/vfs/metasystem/mds/mds_client.h | 5 +- src/client/vfs/metasystem/mds/parent_memo.cc | 38 +++ src/client/vfs/metasystem/mds/parent_memo.h | 4 + src/client/vfs/vfs_impl.cc | 9 +- src/client/vfs/vfs_wrapper.cc | 28 +-- src/mds/common/context.h | 30 ++- src/mds/filesystem/dentry.h | 9 +- src/mds/filesystem/filesystem.cc | 222 ++++++++---------- src/mds/filesystem/filesystem.h | 10 +- src/mds/filesystem/partition.cc | 81 ++++++- src/mds/filesystem/partition.h | 31 ++- src/mds/service/debug_service.cc | 4 +- src/mds/service/fsstat_service.cc | 3 +- src/mds/service/mds_service.cc | 183 ++++++--------- src/mds/service/service_helper.h | 12 +- 22 files changed, 504 insertions(+), 359 deletions(-) diff --git a/proto b/proto index 8be707aad..c16761e62 160000 --- a/proto +++ b/proto @@ -1 +1 @@ -Subproject commit 8be707aada56b1bdae72015cb76a2d542dd8d8e7 +Subproject commit c16761e6222cad8191192ed29d121d02c496df8f diff --git a/src/client/fuse/fuse_op.cc b/src/client/fuse/fuse_op.cc index 15e542c1b..02145778e 100644 --- a/src/client/fuse/fuse_op.cc +++ b/src/client/fuse/fuse_op.cc @@ -91,8 +91,10 @@ void Attr2FuseEntry(const Attr& attr, struct fuse_entry_param* e) { e->generation = 0; Attr2Stat(attr, &e->attr); - e->attr_timeout = g_vfs->GetAttrTimeout(attr.type); - e->entry_timeout = g_vfs->GetEntryTimeout(attr.type); + // e->attr_timeout = g_vfs->GetAttrTimeout(attr.type); + // e->entry_timeout = g_vfs->GetEntryTimeout(attr.type); + e->attr_timeout = 0; + e->entry_timeout = 0; } Attr Stat2Attr(struct stat* stat) { @@ -114,29 +116,47 @@ Attr Stat2Attr(struct stat* stat) { } // namespace static void ReplyError(fuse_req_t req, const Status& s) { - fuse_reply_err(req, s.ToSysErrNo()); + int ret = fuse_reply_err(req, s.ToSysErrNo()); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_err fail, ret({}).", errno); + } } static void ReplyEntry(fuse_req_t req, const Attr& attr) { fuse_entry_param e; memset(&e, 0, sizeof(e)); Attr2FuseEntry(attr, &e); - fuse_reply_entry(req, &e); + + int ret = fuse_reply_entry(req, &e); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_entry fail, ret({}).", errno); + } } static void ReplyAttr(fuse_req_t req, const Attr& attr) { struct stat stat; memset(&stat, 0, sizeof(stat)); Attr2Stat(attr, &stat); - fuse_reply_attr(req, &stat, g_vfs->GetAttrTimeout(attr.type)); + + int ret = fuse_reply_attr(req, &stat, g_vfs->GetAttrTimeout(attr.type)); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_attr fail, ret({}).", errno); + } } static void ReplyReadlink(fuse_req_t req, const std::string& link) { - fuse_reply_readlink(req, link.c_str()); + int ret = fuse_reply_readlink(req, link.c_str()); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_readlink fail, ret({}).", + errno); + } } static void ReplyOpen(fuse_req_t req, struct fuse_file_info* fi) { - fuse_reply_open(req, fi); + int ret = fuse_reply_open(req, fi); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_open fail, ret({}).", errno); + } } static void ReplyCreate(fuse_req_t req, struct fuse_file_info* fi, @@ -144,21 +164,32 @@ static void ReplyCreate(fuse_req_t req, struct fuse_file_info* fi, fuse_entry_param e; memset(&e, 0, sizeof(fuse_entry_param)); Attr2FuseEntry(attr, &e); - fuse_reply_create(req, &e, fi); + + int ret = fuse_reply_create(req, &e, fi); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_create fail, ret({}).", errno); + } } static void ReplyData(fuse_req_t req, char* buffer, size_t size) { struct fuse_bufvec bufvec = FUSE_BUFVEC_INIT(size); bufvec.buf[0].mem = buffer; - fuse_reply_data(req, &bufvec, FUSE_BUF_SPLICE_MOVE); + + int ret = fuse_reply_data(req, &bufvec, FUSE_BUF_SPLICE_MOVE); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_create fail, ret({}).", errno); + } } static void ReplyData(fuse_req_t req, dingofs::client::vfs::DataBuffer& data_buffer) { - std::vector iovecs = data_buffer.GatherIOVecs(); + auto iovecs = data_buffer.GatherIOVecs(); if (iovecs.empty()) { - fuse_reply_buf(req, nullptr, 0); + int ret = fuse_reply_buf(req, nullptr, 0); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_data fail, ret({}).", errno); + } return; } @@ -185,24 +216,39 @@ static void ReplyData(fuse_req_t req, tmp_fuse_bufvec->buf[i].size = iovecs[i].iov_len; } - fuse_reply_data(req, tmp_fuse_bufvec.get(), FUSE_BUF_SPLICE_MOVE); + int ret = fuse_reply_data(req, tmp_fuse_bufvec.get(), FUSE_BUF_SPLICE_MOVE); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_data fail, ret({}).", errno); + } } static void ReplyWrite(fuse_req_t req, size_t size) { - fuse_reply_write(req, size); + int ret = fuse_reply_write(req, size); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_write fail, ret({}).", errno); + } } static void ReplyBuf(fuse_req_t req, char* buffer, size_t size) { - fuse_reply_buf(req, buffer, size); + int ret = fuse_reply_buf(req, buffer, size); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_buf fail, ret({}).", errno); + } } // Reply with needed buffer size static void ReplyXattr(fuse_req_t req, size_t size) { - fuse_reply_xattr(req, size); + int ret = fuse_reply_xattr(req, size); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_xattr fail, ret({}).", errno); + } } static void ReplyIoctl(fuse_req_t req, const char* out_buf, size_t out_bufsz) { - fuse_reply_ioctl(req, 0, out_buf, out_bufsz); + int ret = fuse_reply_ioctl(req, 0, out_buf, out_bufsz); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_ioctl fail, ret({}).", errno); + } } static void ReplyStatfs(fuse_req_t req, const FsStat& stat) { @@ -253,7 +299,10 @@ static void ReplyStatfs(fuse_req_t req, const FsStat& stat) { stbuf.f_flag = 0; stbuf.f_namemax = g_vfs->GetMaxNameLength(); - fuse_reply_statfs(req, &stbuf); + int ret = fuse_reply_statfs(req, &stbuf); + if (ret != 0) { + LOG(ERROR) << fmt::format("[fuse] fuse_reply_statfs fail, ret({}).", errno); + } } int InitFuseClient(const char* argv0, const struct MountOption* mount_option) { @@ -531,8 +580,11 @@ void FuseOpOpenDir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { } else { fi->fh = fh; - fi->cache_readdir = FLAGS_client_fuse_file_info_keep_cache ? 1 : 0; - fi->keep_cache = FLAGS_client_fuse_file_info_keep_cache ? 1 : 0; + // fi->cache_readdir = FLAGS_client_fuse_file_info_keep_cache ? 1 : 0; + // fi->keep_cache = FLAGS_client_fuse_file_info_keep_cache ? 1 : 0; + + fi->cache_readdir = 0; + fi->keep_cache = 0; ReplyOpen(req, fi); } @@ -540,7 +592,7 @@ void FuseOpOpenDir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { void FuseOpReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info* fi) { - VLOG(1) << fmt::format("read dir, ino({}) fh({}) off({}) size({})", ino, + VLOG(1) << fmt::format("read dir, ino({}) fh({}) off({}) size({}).", ino, fi->fh, off, size); CHECK_GE(off, 0) << "offset is illegal, offset: " << off; @@ -549,11 +601,9 @@ void FuseOpReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, std::string buffer(size, '\0'); Status s = g_vfs->ReadDir( ino, fi->fh, off, false, - [&](const dingofs::client::vfs::DirEntry& dir_entry, - uint64_t off) -> bool { - (void)off; - VLOG(1) << fmt::format("read dir entry({}/{})", dir_entry.name, - dir_entry.ino); + [&](const dingofs::client::vfs::DirEntry& dir_entry, uint64_t) -> bool { + VLOG(1) << fmt::format("read dir({}/{}) fh({}) entry({}/{}).", ino, off, + fi->fh, dir_entry.name, dir_entry.ino); struct stat stat; std::memset(&stat, 0, sizeof(stat)); @@ -567,7 +617,7 @@ void FuseOpReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, if (entsize > rest_size) { VLOG(1) << fmt::format( "read dir entry is full, ino({}) fh({}) off({}) size({}/{}) " - "entry_size({})", + "entry_size({}).", ino, fi->fh, off, buffer.size(), size, entsize); return false; } @@ -579,13 +629,13 @@ void FuseOpReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, if (!s.ok()) { LOG(ERROR) << fmt::format( - "read dir fail, ino({}) fh({}) off({}) size({}) error({})", ino, fi->fh, - off, size, s.ToString()); + "read dir fail, ino({}) fh({}) off({}) size({}) error({}).", ino, + fi->fh, off, size, s.ToString()); ReplyError(req, s); } else { buffer.resize(writed_size); - VLOG(1) << fmt::format("read dir success, ino({}) fh({}) off({}) size({}) ", + VLOG(1) << fmt::format("read dir success, ino({}) fh({}) off({}) size({}).", ino, fi->fh, off, buffer.size()); ReplyBuf(req, buffer.data(), buffer.size()); } @@ -593,7 +643,7 @@ void FuseOpReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, void FuseOpReadDirPlus(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info* fi) { - VLOG(1) << fmt::format("read dir, ino({}) fh({}) off({}) size({})", ino, + VLOG(1) << fmt::format("read dir, ino({}) fh({}) off({}) size({}).", ino, fi->fh, off, size); CHECK_GE(off, 0) << "offset is illegal, offset: " << off; @@ -602,10 +652,11 @@ void FuseOpReadDirPlus(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, std::string buffer(size, '\0'); Status s = g_vfs->ReadDir( ino, fi->fh, off, true, - [&](const dingofs::client::vfs::DirEntry& dir_entry, int32_t) -> bool { - (void)off; - VLOG(1) << fmt::format("read dir entry({}/{}) attr({})", dir_entry.name, - dir_entry.ino, Attr2Str(dir_entry.attr)); + [&](const dingofs::client::vfs::DirEntry& dir_entry, + uint64_t off2) -> bool { + VLOG(1) << fmt::format( + "read dir({}/{}/{}) fh({}) entry({}/{}) attr({}).", ino, off, off2, + fi->fh, dir_entry.name, dir_entry.ino, Attr2Str(dir_entry.attr)); fuse_entry_param fuse_entry; memset(&fuse_entry, 0, sizeof(fuse_entry_param)); @@ -615,12 +666,12 @@ void FuseOpReadDirPlus(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, size_t entsize = fuse_add_direntry_plus(req, buffer.data() + writed_size, rest_size, - dir_entry.name.c_str(), &fuse_entry, ++off); + dir_entry.name.c_str(), &fuse_entry, off2); if (entsize > rest_size) { VLOG(1) << fmt::format( "read dir entry is full, ino({}) fh({}) off({}) size({}/{}) " "entry_size({})", - ino, fi->fh, off, buffer.size(), size, entsize); + ino, fi->fh, off2, buffer.size(), size, entsize); return false; } writed_size += entsize; @@ -630,12 +681,12 @@ void FuseOpReadDirPlus(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, if (!s.ok()) { LOG(ERROR) << fmt::format( - "read dir fail, ino({}) fh({}) off({}) size({}) error({})", ino, fi->fh, - off, size, s.ToString()); + "read dir fail, ino({}) fh({}) off({}) size({}) error({}).", ino, + fi->fh, off, size, s.ToString()); ReplyError(req, s); } else { buffer.resize(writed_size); - VLOG(1) << fmt::format("read dir success, ino({}) fh({}) off({}) size({}) ", + VLOG(1) << fmt::format("read dir success, ino({}) fh({}) off({}) size({}).", ino, fi->fh, off, buffer.size()); ReplyBuf(req, buffer.data(), buffer.size()); diff --git a/src/client/vfs/metasystem/mds/dir_iterator.cc b/src/client/vfs/metasystem/mds/dir_iterator.cc index 1b8f9057a..2a189c636 100644 --- a/src/client/vfs/metasystem/mds/dir_iterator.cc +++ b/src/client/vfs/metasystem/mds/dir_iterator.cc @@ -31,7 +31,7 @@ Status DirIterator::Seek() { std::vector entries; auto status = - mds_client_->ReadDir(ctx_, ino_, last_name_, + mds_client_->ReadDir(ctx_, ino_, fh_, last_name_, FLAGS_client_vfs_read_dir_batch_size, true, entries); if (!status.ok()) return status; @@ -44,14 +44,14 @@ Status DirIterator::Seek() { return Status::OK(); } -bool DirIterator::Valid() { return offset_ < entries_.size(); } +bool DirIterator::Valid() { return offset_.load() < entries_.size(); } DirEntry DirIterator::GetValue(bool with_attr) { - CHECK(offset_ < entries_.size()) << "offset out of range"; + CHECK(offset_.load() < entries_.size()) << "offset out of range"; with_attr_ = with_attr; - return entries_[offset_]; + return entries_[offset_.load()]; } void DirIterator::Next() { @@ -60,7 +60,7 @@ void DirIterator::Next() { } std::vector entries; - auto status = mds_client_->ReadDir(ctx_, ino_, last_name_, + auto status = mds_client_->ReadDir(ctx_, ino_, fh_, last_name_, FLAGS_client_vfs_read_dir_batch_size, with_attr_, entries); if (!status.ok()) return; @@ -76,7 +76,7 @@ bool DirIterator::Dump(Json::Value& value) { value["ino"] = ino_; value["last_name"] = last_name_; value["with_attr"] = with_attr_; - value["offset"] = offset_; + value["offset"] = offset_.load(); Json::Value entries = Json::arrayValue; for (const auto& entry : entries_) { @@ -173,7 +173,8 @@ bool DirIteratorManager::Load(MDSClientSPtr mds_client, for (const auto& item : items) { Ino ino = item["ino"].asUInt64(); - auto dir_iterator = DirIterator::New(nullptr, mds_client, ino); + uint64_t fh = item["fh"].asUInt64(); + auto dir_iterator = DirIterator::New(nullptr, mds_client, ino, fh); if (!dir_iterator->Load(item)) { LOG(ERROR) << fmt::format( "[meta.dir_iterator] load dir({}) iterator fail.", ino); diff --git a/src/client/vfs/metasystem/mds/dir_iterator.h b/src/client/vfs/metasystem/mds/dir_iterator.h index 6b5b7a30d..6fd0996ec 100644 --- a/src/client/vfs/metasystem/mds/dir_iterator.h +++ b/src/client/vfs/metasystem/mds/dir_iterator.h @@ -36,12 +36,12 @@ using DirIteratorSPtr = std::shared_ptr; // used by read dir class DirIterator { public: - DirIterator(ContextSPtr ctx, MDSClientSPtr mds_client, Ino ino) - : ctx_(ctx), mds_client_(mds_client), ino_(ino) {} + DirIterator(ContextSPtr ctx, MDSClientSPtr mds_client, Ino ino, uint64_t fh) + : ctx_(ctx), mds_client_(mds_client), ino_(ino), fh_(fh) {} - static DirIteratorSPtr New(ContextSPtr ctx, MDSClientSPtr mds_client, - Ino ino) { - return std::make_shared(ctx, mds_client, ino); + static DirIteratorSPtr New(ContextSPtr ctx, MDSClientSPtr mds_client, Ino ino, + uint64_t fh) { + return std::make_shared(ctx, mds_client, ino, fh); } Status Seek(); @@ -58,11 +58,12 @@ class DirIterator { ContextSPtr ctx_; Ino ino_; + uint64_t fh_; // last file/dir name, used to read next batch std::string last_name_; bool with_attr_{false}; - uint32_t offset_{0}; + std::atomic offset_{0}; // stash entry for read dir std::vector entries_; diff --git a/src/client/vfs/metasystem/mds/filesystem.cc b/src/client/vfs/metasystem/mds/filesystem.cc index ecc60aca7..78a07405d 100644 --- a/src/client/vfs/metasystem/mds/filesystem.cc +++ b/src/client/vfs/metasystem/mds/filesystem.cc @@ -594,7 +594,7 @@ Status MDSFileSystem::RmDir(ContextSPtr ctx, Ino parent, } Status MDSFileSystem::OpenDir(ContextSPtr ctx, Ino ino, uint64_t fh) { - auto dir_iterator = DirIterator::New(ctx, mds_client_, ino); + auto dir_iterator = DirIterator::New(ctx, mds_client_, ino, fh); auto status = dir_iterator->Seek(); if (!status.ok()) { LOG(ERROR) << fmt::format( @@ -619,7 +619,7 @@ Status MDSFileSystem::ReadDir(ContextSPtr ctx, Ino, uint64_t fh, CorrectAttr(ctx, dir_iterator->LastFetchTimeNs(), entry.attr, "readdir"); - if (!handler(entry, offset)) { + if (!handler(entry, ++offset)) { break; } diff --git a/src/client/vfs/metasystem/mds/helper.h b/src/client/vfs/metasystem/mds/helper.h index 2d2d16283..c57a0468d 100644 --- a/src/client/vfs/metasystem/mds/helper.h +++ b/src/client/vfs/metasystem/mds/helper.h @@ -140,7 +140,7 @@ class Helper { out_attr.add_parents(parent); } - return std::move(out_attr); + return out_attr; } static DirEntry ToDirEntry(const pb::mds::ReadDirResponse::Entry& entry) { @@ -149,7 +149,7 @@ class Helper { out_entry.ino = entry.ino(); out_entry.attr = ToAttr(entry.inode()); - return std::move(out_entry); + return out_entry; } static Slice ToSlice(const mds::SliceEntry& slice) { diff --git a/src/client/vfs/metasystem/mds/mds_client.cc b/src/client/vfs/metasystem/mds/mds_client.cc index 2fdabaaef..b1354e86f 100644 --- a/src/client/vfs/metasystem/mds/mds_client.cc +++ b/src/client/vfs/metasystem/mds/mds_client.cc @@ -142,6 +142,12 @@ uint64_t MDSClient::GetInodeVersion(Ino ino) { return version; } +int32_t MDSClient::GetInodeRenameRefCount(Ino ino) { + int32_t rename_ref_count = 0; + parent_memo_->GetRenameRefCount(ino, rename_ref_count); + return rename_ref_count; +} + Status MDSClient::GetFsInfo(RPCPtr rpc, const std::string& name, mds::FsInfoEntry& fs_info) { pb::mds::GetFsInfoRequest request; @@ -280,6 +286,7 @@ Status MDSClient::Create(ContextSPtr ctx, Ino parent, const std::string& name, pb::mds::BatchCreateRequest request; pb::mds::BatchCreateResponse response; + request.mutable_context()->set_inode_version(GetInodeVersion(parent)); SetAncestorInContext(request, parent); request.set_fs_id(fs_id_); @@ -324,6 +331,7 @@ Status MDSClient::MkNod(ContextSPtr ctx, Ino parent, const std::string& name, pb::mds::MkNodRequest request; pb::mds::MkNodResponse response; + request.mutable_context()->set_inode_version(GetInodeVersion(parent)); SetAncestorInContext(request, parent); request.set_fs_id(fs_id_); @@ -363,6 +371,7 @@ Status MDSClient::MkDir(ContextSPtr ctx, Ino parent, const std::string& name, pb::mds::MkDirRequest request; pb::mds::MkDirResponse response; + request.mutable_context()->set_inode_version(GetInodeVersion(parent)); SetAncestorInContext(request, parent); request.set_fs_id(fs_id_); @@ -400,6 +409,7 @@ Status MDSClient::RmDir(ContextSPtr ctx, Ino parent, const std::string& name) { pb::mds::RmDirRequest request; pb::mds::RmDirResponse response; + request.mutable_context()->set_inode_version(GetInodeVersion(parent)); SetAncestorInContext(request, parent); request.set_fs_id(fs_id_); @@ -415,7 +425,7 @@ Status MDSClient::RmDir(ContextSPtr ctx, Ino parent, const std::string& name) { return Status::OK(); } -Status MDSClient::ReadDir(ContextSPtr ctx, Ino ino, +Status MDSClient::ReadDir(ContextSPtr ctx, Ino ino, uint64_t fh, const std::string& last_name, uint32_t limit, bool with_attr, std::vector& entries) { CHECK(fs_id_ != 0) << "fs_id is invalid."; @@ -428,12 +438,15 @@ Status MDSClient::ReadDir(ContextSPtr ctx, Ino ino, pb::mds::ReadDirResponse response; request.mutable_context()->set_inode_version(GetInodeVersion(ino)); + request.mutable_context()->set_use_base_version(GetInodeRenameRefCount(ino) > + 0); request.set_fs_id(fs_id_); request.set_ino(ino); request.set_last_name(last_name); request.set_limit(limit); request.set_with_attr(with_attr); + request.set_fh(fh); auto status = SendRequest(ctx, get_mds_fn, "MDSService", "ReadDir", request, response); @@ -441,6 +454,8 @@ Status MDSClient::ReadDir(ContextSPtr ctx, Ino ino, return status; } + parent_memo_->DecRenameRefCount(ino); + entries.reserve(response.entries_size()); for (const auto& entry : response.entries()) { parent_memo_->Upsert(entry.ino(), ino, entry.inode().version()); @@ -515,6 +530,7 @@ Status MDSClient::Link(ContextSPtr ctx, Ino ino, Ino new_parent, pb::mds::LinkRequest request; pb::mds::LinkResponse response; + request.mutable_context()->set_inode_version(GetInodeVersion(new_parent)); SetAncestorInContext(request, new_parent); request.set_fs_id(fs_id_); @@ -547,6 +563,8 @@ Status MDSClient::UnLink(ContextSPtr ctx, Ino parent, const std::string& name) { pb::mds::UnLinkRequest request; pb::mds::UnLinkResponse response; + request.mutable_context()->set_inode_version(GetInodeVersion(parent)); + SetAncestorInContext(request, parent); request.set_fs_id(fs_id_); @@ -578,6 +596,7 @@ Status MDSClient::Symlink(ContextSPtr ctx, Ino parent, const std::string& name, pb::mds::SymlinkRequest request; pb::mds::SymlinkResponse response; + request.mutable_context()->set_inode_version(GetInodeVersion(parent)); SetAncestorInContext(request, parent); request.set_fs_id(fs_id_); @@ -897,8 +916,10 @@ Status MDSClient::Rename(ContextSPtr ctx, Ino old_parent, return status; } - parent_memo_->UpsertVersion(old_parent, response.old_parent_version()); - parent_memo_->UpsertVersion(new_parent, response.new_parent_version()); + parent_memo_->UpsertVersionAndRenameRefCount(old_parent, + response.old_parent_version()); + parent_memo_->UpsertVersionAndRenameRefCount(new_parent, + response.new_parent_version()); return Status::OK(); } diff --git a/src/client/vfs/metasystem/mds/mds_client.h b/src/client/vfs/metasystem/mds/mds_client.h index b7e79e2f2..de35b3174 100644 --- a/src/client/vfs/metasystem/mds/mds_client.h +++ b/src/client/vfs/metasystem/mds/mds_client.h @@ -102,8 +102,8 @@ class MDSClient { Attr& out_attr); Status RmDir(ContextSPtr ctx, Ino parent, const std::string& name); - Status ReadDir(ContextSPtr ctx, Ino ino, const std::string& last_name, - uint32_t limit, bool with_attr, + Status ReadDir(ContextSPtr ctx, Ino ino, uint64_t fh, + const std::string& last_name, uint32_t limit, bool with_attr, std::vector& entries); Status Open(ContextSPtr ctx, Ino ino, int flags, bool is_prefetch_chunk, @@ -157,6 +157,7 @@ class MDSClient { MDSMeta GetMdsByParent(int64_t parent, bool& is_primary_mds); uint64_t GetInodeVersion(Ino ino); + int32_t GetInodeRenameRefCount(Ino ino); bool UpdateRouter(); diff --git a/src/client/vfs/metasystem/mds/parent_memo.cc b/src/client/vfs/metasystem/mds/parent_memo.cc index 7c852394b..67f490f74 100644 --- a/src/client/vfs/metasystem/mds/parent_memo.cc +++ b/src/client/vfs/metasystem/mds/parent_memo.cc @@ -84,6 +84,19 @@ std::vector ParentMemo::GetAncestors(uint64_t ino) { return ancestors; } +bool ParentMemo::GetRenameRefCount(Ino ino, int32_t& rename_ref_count) { + utils::ReadLockGuard lk(lock_); + + auto it = ino_map_.find(ino); + if (it == ino_map_.end()) { + return false; + } + + rename_ref_count = it->second.rename_ref_count; + + return true; +} + void ParentMemo::Upsert(Ino ino, Ino parent) { utils::WriteLockGuard lk(lock_); @@ -107,6 +120,20 @@ void ParentMemo::UpsertVersion(Ino ino, uint64_t version) { } } +void ParentMemo::UpsertVersionAndRenameRefCount(Ino ino, uint64_t version) { + utils::WriteLockGuard lk(lock_); + + auto it = ino_map_.find(ino); + if (it != ino_map_.end()) { + it->second.version = std::max(it->second.version, version); + ++it->second.rename_ref_count; + + } else { + ino_map_[ino] = + Entry{.parent = 0, .version = version, .rename_ref_count = 1}; + } +} + void ParentMemo::Upsert(Ino ino, Ino parent, uint64_t version) { utils::WriteLockGuard lk(lock_); @@ -126,6 +153,17 @@ void ParentMemo::Delete(Ino ino) { ino_map_.erase(ino); } +void ParentMemo::DecRenameRefCount(Ino ino) { + utils::WriteLockGuard lk(lock_); + + auto it = ino_map_.find(ino); + if (it != ino_map_.end()) { + if (it->second.rename_ref_count > 0) { + --it->second.rename_ref_count; + } + } +} + bool ParentMemo::Dump(Json::Value& value) { utils::ReadLockGuard lk(lock_); diff --git a/src/client/vfs/metasystem/mds/parent_memo.h b/src/client/vfs/metasystem/mds/parent_memo.h index fa47fd567..bf4988891 100644 --- a/src/client/vfs/metasystem/mds/parent_memo.h +++ b/src/client/vfs/metasystem/mds/parent_memo.h @@ -43,11 +43,14 @@ class ParentMemo { bool GetParent(Ino ino, Ino& parent); bool GetVersion(Ino ino, uint64_t& version); std::vector GetAncestors(uint64_t ino); + bool GetRenameRefCount(Ino ino, int32_t& rename_ref_count); void Upsert(Ino ino, Ino parent); void UpsertVersion(Ino ino, uint64_t version); + void UpsertVersionAndRenameRefCount(Ino ino, uint64_t version); void Upsert(Ino ino, Ino parent, uint64_t version); void Delete(Ino ino); + void DecRenameRefCount(Ino ino); bool Dump(Json::Value& value); bool Load(const Json::Value& value); @@ -56,6 +59,7 @@ class ParentMemo { struct Entry { Ino parent; uint64_t version; + int32_t rename_ref_count{0}; }; utils::RWLock lock_; diff --git a/src/client/vfs/vfs_impl.cc b/src/client/vfs/vfs_impl.cc index 03173867a..8c58d147d 100644 --- a/src/client/vfs/vfs_impl.cc +++ b/src/client/vfs/vfs_impl.cc @@ -482,13 +482,10 @@ Status VFSImpl::ReadDir(ContextSPtr ctx, Ino ino, uint64_t fh, uint64_t offset, handler(stats_entry, 1); // pos 0 is the offset for .stats entry } - uint64_t to_meta = (offset > 0) ? (offset - 1) : 0; - return meta_system_->ReadDir( - ctx, ino, fh, to_meta, with_attr, - [handler](const DirEntry& entry, uint64_t meta_offset) { - uint64_t return_off = meta_offset + 1; - return handler(entry, return_off); + ctx, ino, fh, offset, with_attr, + [handler](const DirEntry& entry, uint64_t offset) { + return handler(entry, offset); }); } diff --git a/src/client/vfs/vfs_wrapper.cc b/src/client/vfs/vfs_wrapper.cc index b7431ab0e..6017641aa 100644 --- a/src/client/vfs/vfs_wrapper.cc +++ b/src/client/vfs/vfs_wrapper.cc @@ -812,21 +812,19 @@ Status VFSWrapper::ReadDir(Ino ino, uint64_t fh, uint64_t offset, {&client_op_metric_->opReadDir, &client_op_metric_->opAll}); if (FLAGS_client_access_logging_verbose) { - s = vfs_->ReadDir(span->GetContext(), ino, fh, offset, with_attr, - [&](const dingofs::client::vfs::DirEntry& dir_entry, - int32_t off) -> bool { - (void)off; - dingofs::client::AccessLogGuard log( - [ino, name = std::string(dir_entry.name), - attr = dir_entry.attr, fh]() { - return fmt::format( - "add_direntry ({}/{}) : {} [fh:{}]", ino, - name, dingofs::client::vfs::Attr2Str(attr), - fh); - }); - - return handler(dir_entry, off); - }); + s = vfs_->ReadDir( + span->GetContext(), ino, fh, offset, with_attr, + [&](const vfs::DirEntry& dir_entry, uint64_t off) -> bool { + dingofs::client::AccessLogGuard log( + [ino, off, name = std::string(dir_entry.name), + attr = dir_entry.attr, fh]() { + return fmt::format("add_direntry ({}/{}) : {} {} [fh:{}]", ino, + name, dingofs::client::vfs::Attr2Str(attr), + off, fh); + }); + + return handler(dir_entry, off); + }); } else { s = vfs_->ReadDir(span->GetContext(), ino, fh, offset, with_attr, handler); diff --git a/src/mds/common/context.h b/src/mds/common/context.h index 7fb5d208b..848454bb1 100644 --- a/src/mds/common/context.h +++ b/src/mds/common/context.h @@ -16,6 +16,7 @@ #define DINGOFS_MDS_COMMON_CONTEXT_H_ #include +#include #include "mds/common/tracing.h" #include "mds/common/type.h" @@ -26,25 +27,38 @@ namespace mds { class Context { public: Context() = default; - Context(bool is_bypass_cache, uint64_t inode_version, const std::string& client_id = "") - : is_bypass_cache_(is_bypass_cache), inode_version_(inode_version), client_id_(client_id) {}; + Context(const std::string& request_id, const std::string method_name) + : request_id_(request_id), method_name_(method_name) {}; + Context(const ContextEntry& ctx, const std::string& request_id, const std::string method_name) + : is_bypass_cache_(ctx.is_bypass_cache()), + use_base_version_(ctx.use_base_version()), + inode_version_(ctx.inode_version()), + client_id_(ctx.client_id()), + request_id_(request_id), + method_name_(method_name) { + ancestors_ = {ctx.ancestors().begin(), ctx.ancestors().end()}; + }; - void SetBypassCache(bool is_bypass_cache) { is_bypass_cache_ = is_bypass_cache; } bool IsBypassCache() const { return is_bypass_cache_; } - + bool UseBaseVersion() const { return use_base_version_; } uint64_t GetInodeVersion() const { return inode_version_; } const std::string& ClientId() const { return client_id_; } + const std::string& RequestId() const { return request_id_; } + const std::string& MethodName() const { return method_name_; } Trace& GetTrace() { return trace_; } - void SetAncestors(std::vector&& ancestors) { ancestors_ = std::move(ancestors); } const std::vector& GetAncestors() const { return ancestors_; } private: - bool is_bypass_cache_{false}; + const bool is_bypass_cache_{false}; + + const bool use_base_version_{false}; - uint64_t inode_version_{0}; + const uint64_t inode_version_{0}; - std::string client_id_; + const std::string client_id_; + const std::string request_id_; + const std::string method_name_; std::vector ancestors_; diff --git a/src/mds/filesystem/dentry.h b/src/mds/filesystem/dentry.h index 92b458104..96cb7a39d 100644 --- a/src/mds/filesystem/dentry.h +++ b/src/mds/filesystem/dentry.h @@ -34,6 +34,7 @@ using DentrySetSPtr = std::shared_ptr; class Dentry { public: Dentry() = default; + Dentry(const std::string& name) : name_(name), type_(pb::mds::FileType::FILE) {}; Dentry(uint32_t fs_id, const std::string& name, Ino parent, Ino ino, pb::mds::FileType type, uint32_t flag, InodeSPtr inode = nullptr); Dentry(const pb::mds::Dentry& dentry, InodeSPtr inode = nullptr); @@ -53,11 +54,11 @@ class Dentry { private: std::string name_; - uint32_t fs_id_; - Ino ino_; - Ino parent_; + uint32_t fs_id_{0}; + Ino ino_{0}; + Ino parent_{0}; pb::mds::FileType type_; - uint32_t flag_; + uint32_t flag_{0}; // maybe null, just inode shortcut InodeWPtr inode_; diff --git a/src/mds/filesystem/filesystem.cc b/src/mds/filesystem/filesystem.cc index 2e938ca13..b0c8d612e 100644 --- a/src/mds/filesystem/filesystem.cc +++ b/src/mds/filesystem/filesystem.cc @@ -198,6 +198,20 @@ Status FileSystem::GetPartitionParentInode(Context& ctx, PartitionPtr& partition return GetInode(ctx, ctx.GetInodeVersion(), partition->INo(), out_inode); } +void FileSystem::AddDentryToPartition(Ino parent, const Dentry& dentry, uint64_t version) { + auto partition = GetPartitionFromCache(parent); + CHECK(partition != nullptr) << fmt::format("partition({}) not exist in cache.", parent); + + partition->PutChild(dentry, version); +} + +void FileSystem::DeleteDentryFromPartition(Ino parent, const std::string& name, uint64_t version) { + auto partition = GetPartitionFromCache(parent); + CHECK(partition != nullptr) << fmt::format("partition({}) not exist in cache.", parent); + + partition->DeleteChild(name, version); +} + Status FileSystem::GetPartition(Context& ctx, Ino parent, PartitionPtr& out_partition) { return GetPartition(ctx, ctx.GetInodeVersion(), parent, out_partition); } @@ -205,9 +219,10 @@ Status FileSystem::GetPartition(Context& ctx, Ino parent, PartitionPtr& out_part Status FileSystem::GetPartition(Context& ctx, uint64_t version, Ino parent, PartitionPtr& out_partition) { auto& trace = ctx.GetTrace(); const bool bypass_cache = ctx.IsBypassCache(); + const bool use_base_version = ctx.UseBaseVersion(); if (bypass_cache) { - auto status = GetPartitionFromStore(parent, "Bypass", out_partition); + auto status = GetPartitionFromStore(ctx, parent, "Bypass", out_partition); if (!status.ok()) { return Status(pb::error::ENOT_FOUND, fmt::format("not found partition({}), {}.", parent, status.error_str())); } @@ -217,7 +232,7 @@ Status FileSystem::GetPartition(Context& ctx, uint64_t version, Ino parent, Part auto partition = GetPartitionFromCache(parent); if (partition == nullptr) { - auto status = GetPartitionFromStore(parent, "CacheMiss", out_partition); + auto status = GetPartitionFromStore(ctx, parent, "CacheMiss", out_partition); if (!status.ok()) { return Status(pb::error::ENOT_FOUND, fmt::format("not found partition({}), {}.", parent, status.error_str())); } @@ -225,8 +240,10 @@ Status FileSystem::GetPartition(Context& ctx, uint64_t version, Ino parent, Part return status; } - if (version > partition->Version()) { - auto status = GetPartitionFromStore(parent, "OutOfDate", out_partition); + uint64_t check_version = use_base_version ? partition->BaseVersion() : partition->DeltaVersion(); + if (version > check_version) { + std::string reason = fmt::format("OutOfDate[{},cache{},req{}]", use_base_version, check_version, version); + auto status = GetPartitionFromStore(ctx, parent, reason, out_partition); if (!status.ok()) { return Status(pb::error::ENOT_FOUND, fmt::format("not found partition({}), {}.", parent, status.error_str())); } @@ -244,7 +261,11 @@ PartitionPtr FileSystem::GetPartitionFromCache(Ino parent) { return partition_ca std::map FileSystem::GetAllPartitionsFromCache() { return partition_cache_.GetAll(); } -Status FileSystem::GetPartitionFromStore(Ino parent, const std::string& reason, PartitionPtr& out_partition) { +Status FileSystem::GetPartitionFromStore(Context& ctx, Ino parent, const std::string& reason, + PartitionPtr& out_partition) { + const std::string& request_id = ctx.RequestId(); + const std::string& method_name = ctx.MethodName(); + // scan dentry from store Trace trace; std::vector kvs; @@ -257,7 +278,7 @@ Status FileSystem::GetPartitionFromStore(Ino parent, const std::string& reason, if (!status.ok()) return status; if (kvs.empty()) { - return Status(pb::error::ENOT_FOUND, "not found kv"); + return Status(pb::error::ENOT_FOUND, "not found partition from store"); } auto& parent_kv = kvs.at(0); @@ -268,10 +289,11 @@ Status FileSystem::GetPartitionFromStore(Ino parent, const std::string& reason, auto parent_inode = Inode::New(MetaCodec::DecodeInodeValue(parent_kv.value)); auto old_partition = partition_cache_.Get(parent); - if (old_partition != nullptr && parent_inode->Version() <= old_partition->Version()) { + if (old_partition != nullptr && parent_inode->Version() <= old_partition->BaseVersion()) { out_partition = old_partition; - DINGO_LOG(INFO) << fmt::format("[fs.{}.{}] exist fresh partition, version({}:{}) reason({}).", fs_id_, parent, - old_partition->Version(), parent_inode->Version(), reason); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}.{}.{}] exist fresh partition, version({}:{}) reason({}).", fs_id_, parent, + method_name, request_id, old_partition->BaseVersion(), parent_inode->Version(), + reason); return Status::OK(); } @@ -284,13 +306,11 @@ Status FileSystem::GetPartitionFromStore(Ino parent, const std::string& reason, partition->PutChild(dentry); } - partition_cache_.PutIf(parent, partition); + out_partition = partition_cache_.PutIf(parent, partition); UpsertInodeCache(parent, parent_inode); - out_partition = partition; - - DINGO_LOG(INFO) << fmt::format("[fs.{}.{}] fetch partition, version({}) reason({}).", fs_id_, parent, - parent_inode->Version(), reason); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}.{}.{}] fetch partition, size({}) version({}) reason({}).", fs_id_, parent, + method_name, request_id, kvs.size(), parent_inode->Version(), reason); return Status::OK(); } @@ -339,7 +359,7 @@ Status FileSystem::GetInode(Context& ctx, uint64_t version, const Dentry& dentry Status status; do { if (bypass_cache) { - status = GetInodeFromStore(dentry.INo(), "Bypass", false, out_inode); + status = GetInodeFromStore(ctx, dentry.INo(), "Bypass", false, out_inode); is_fetch = true; break; } @@ -348,14 +368,14 @@ Status FileSystem::GetInode(Context& ctx, uint64_t version, const Dentry& dentry if (inode == nullptr) { inode = GetInodeFromCache(dentry.INo()); if (inode == nullptr) { - status = GetInodeFromStore(dentry.INo(), "CacheMiss", true, out_inode); + status = GetInodeFromStore(ctx, dentry.INo(), "CacheMiss", true, out_inode); is_fetch = true; break; } } if (inode->Version() < version) { - status = GetInodeFromStore(dentry.INo(), "OutOfDate", true, out_inode); + status = GetInodeFromStore(ctx, dentry.INo(), "OutOfDate", true, out_inode); is_fetch = true; break; } @@ -381,16 +401,16 @@ Status FileSystem::GetInode(Context& ctx, uint64_t version, Ino ino, InodeSPtr& const bool bypass_cache = ctx.IsBypassCache(); if (bypass_cache) { - return GetInodeFromStore(ino, "Bypass", false, out_inode); + return GetInodeFromStore(ctx, ino, "Bypass", false, out_inode); } auto inode = GetInodeFromCache(ino); if (inode == nullptr) { - return GetInodeFromStore(ino, "CacheMiss", true, out_inode); + return GetInodeFromStore(ctx, ino, "CacheMiss", true, out_inode); } if (inode->Version() < version) { - return GetInodeFromStore(ino, "OutOfDate", true, out_inode); + return GetInodeFromStore(ctx, ino, "OutOfDate", true, out_inode); } out_inode = inode; @@ -399,15 +419,19 @@ Status FileSystem::GetInode(Context& ctx, uint64_t version, Ino ino, InodeSPtr& return Status::OK(); } -Status FileSystem::GetInodeFromStore(Ino ino, const std::string& reason, bool is_cache, InodeSPtr& out_inode) { +Status FileSystem::GetInodeFromStore(Context& ctx, Ino ino, const std::string& reason, bool is_cache, + InodeSPtr& out_inode) { + const auto& request_id = ctx.RequestId(); + const auto& method_name = ctx.MethodName(); + Trace trace; GetInodeAttrOperation operation(trace, fs_id_, ino); auto status = RunOperation(&operation); if (!status.ok()) { if (status.error_code() != pb::error::ENOT_FOUND) { - DINGO_LOG(ERROR) << fmt::format("[fs.{}] fetch inode({}) from store fail, reason({}), status({}).", fs_id_, ino, - reason, status.error_str()); + DINGO_LOG(ERROR) << fmt::format("[fs.{}.{}.{}.{}] fetch inode from store fail, reason({}), status({}).", fs_id_, + method_name, request_id, ino, reason, status.error_str()); } return status; } @@ -418,8 +442,8 @@ Status FileSystem::GetInodeFromStore(Ino ino, const std::string& reason, bool is if (is_cache) UpsertInodeCache(ino, out_inode); - DINGO_LOG(INFO) << fmt::format("[fs.{}.{}] fetch inode, version({}) reason({}).", fs_id_, ino, out_inode->Version(), - reason); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}.{}.{}] fetch inode, version({}) reason({}).", fs_id_, method_name, + request_id, ino, out_inode->Version(), reason); return Status::OK(); } @@ -465,9 +489,9 @@ void FileSystem::UpsertInodeCache(AttrEntry& attr) { inode_cache_.PutIf(attr); } void FileSystem::DeleteInodeFromCache(Ino ino) { inode_cache_.Delete(ino); } void FileSystem::ClearCache() { - partition_cache_.Clear(); - inode_cache_.Clear(); - chunk_cache_.Clear(); + ClearPartitionCache(); + ClearInodeCache(); + ClearChunkCache(); } void FileSystem::ClearInodeCache() { inode_cache_.Clear(); } @@ -598,8 +622,9 @@ Status FileSystem::Lookup(Context& ctx, Ino parent, const std::string& name, Ent entry_out.attr = inode->Copy(); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] lookup parent({}), name({}) version({}) ptr({}).", fs_id_, - duration.ElapsedUs(), parent, name, entry_out.attr.version(), (void*)inode.get()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] lookup parent({}), name({}) version({}) ptr({}).", fs_id_, + ctx.RequestId(), duration.ElapsedUs(), parent, name, entry_out.attr.version(), + (void*)inode.get()); return Status::OK(); } @@ -636,11 +661,6 @@ Status FileSystem::BatchCreate(Context& ctx, Ino parent, const std::vectorPutChild(dentry, parent_attr.version()); + AddDentryToPartition(parent, dentry, parent_attr.version()); } for (auto& inode : inodes) { UpsertInodeCache(inode); @@ -772,14 +792,9 @@ Status FileSystem::MkNod(Context& ctx, const MkNodParam& param, EntryOut& entry_ return Status(pb::error::EILLEGAL_PARAMTETER, "invalid parent inode id"); } - // get partition - PartitionPtr partition; - auto status = GetPartition(ctx, parent, partition); - if (!status.ok()) return status; - // generate inode id Ino ino = 0; - status = GenFileIno(ino); + auto status = GenFileIno(ino); if (!status.ok()) return status; // update parent memo @@ -817,8 +832,8 @@ Status FileSystem::MkNod(Context& ctx, const MkNodParam& param, EntryOut& entry_ MkNodOperation operation(trace, dentry, attr); status = RunOperation(&operation); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] mknod {} finish, status({}).", fs_id_, duration.ElapsedUs(), param.name, - status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] mknod {} finish, status({}).", fs_id_, ctx.RequestId(), + duration.ElapsedUs(), param.name, status.error_str()); if (!status.ok()) { return Status(pb::error::EBACKEND_STORE, fmt::format("put inode/dentry fail, {}", status.error_str())); @@ -830,7 +845,7 @@ Status FileSystem::MkNod(Context& ctx, const MkNodParam& param, EntryOut& entry_ // update cache UpsertInodeCache(ino, inode); UpsertInodeCache(parent_attr); - partition->PutChild(dentry, parent_attr.version()); + AddDentryToPartition(parent, dentry, parent_attr.version()); // update quota std::string reason = fmt::format("mknod.{}.{}", parent, param.name); @@ -969,15 +984,15 @@ Status FileSystem::Open(Context& ctx, Ino ino, uint32_t flags, bool is_prefetch_ fetch_from = "store"; auto status = GetChunksFromStore(ino, chunks, FLAGS_mds_transfer_max_slice_num); if (status.ok() && !is_completely_fn(chunks)) { - DINGO_LOG(WARNING) << fmt::format("[fs.{}] chunks is not completely, ino({}) length({}) chunks({}).", fs_id_, - ino, file_length, chunks.size()); + DINGO_LOG(WARNING) << fmt::format("[fs.{}.{}] chunks is not completely, ino({}) length({}) chunks({}).", fs_id_, + ctx.RequestId(), ino, file_length, chunks.size()); } } } - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] open {} finish, flags({:o}:{}) fetch_chunk({}) status({}).", fs_id_, - duration.ElapsedUs(), ino, flags, Helper::DescOpenFlags(flags), fetch_from, - status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] open {} finish, flags({:o}:{}) fetch_chunk({}) status({}).", fs_id_, + ctx.RequestId(), duration.ElapsedUs(), ino, flags, Helper::DescOpenFlags(flags), + fetch_from, status.error_str()); return Status::OK(); } @@ -996,8 +1011,8 @@ Status FileSystem::Release(Context& ctx, Ino ino, const std::string& session_id) return status; } - DINGO_LOG(INFO) << fmt::format("[fs.{}] release finish, ino({}) session_id({}) status({}).", fs_id_, ino, session_id, - status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}] release finish, ino({}) session_id({}) status({}).", fs_id_, + ctx.RequestId(), ino, session_id, status.error_str()); // delete cache file_session_manager_.Delete(ino, session_id); @@ -1025,16 +1040,9 @@ Status FileSystem::MkDir(Context& ctx, const MkDirParam& param, EntryOut& entry_ return Status(pb::error::EILLEGAL_PARAMTETER, "invalid parent inode id."); } - // get parent dentry - PartitionPtr partition; - auto status = GetPartition(ctx, parent, partition); - if (!status.ok()) { - return status; - } - // generate inode id Ino ino = 0; - status = GenDirIno(ino); + auto status = GenDirIno(ino); if (!status.ok()) { return status; } @@ -1075,8 +1083,8 @@ Status FileSystem::MkDir(Context& ctx, const MkDirParam& param, EntryOut& entry_ status = RunOperation(&operation); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] mkdir {} finish, status({}).", fs_id_, duration.ElapsedUs(), param.name, - status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] mkdir {} finish, status({}).", fs_id_, ctx.RequestId(), + duration.ElapsedUs(), param.name, status.error_str()); if (!status.ok()) { return Status(pb::error::EBACKEND_STORE, fmt::format("put inode/dentry fail, {}", status.error_str())); @@ -1088,7 +1096,8 @@ Status FileSystem::MkDir(Context& ctx, const MkDirParam& param, EntryOut& entry_ // update cache UpsertInodeCache(ino, inode); UpsertInodeCache(parent_attr); - partition->PutChild(dentry, parent_attr.version()); + AddDentryToPartition(parent, dentry, parent_attr.version()); + if (IsMonoPartition()) { partition_cache_.PutIf(ino, Partition::New(inode)); } @@ -1147,8 +1156,8 @@ Status FileSystem::RmDir(Context& ctx, Ino parent, const std::string& name) { status = RunOperation(&operation); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] rmdir {} finish, status({}).", fs_id_, duration.ElapsedUs(), name, - status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] rmdir {}/{} finish, status({}).", fs_id_, ctx.RequestId(), + duration.ElapsedUs(), parent, name, status.error_str()); if (!status.ok()) { return status; } @@ -1158,7 +1167,7 @@ Status FileSystem::RmDir(Context& ctx, Ino parent, const std::string& name) { // update cache UpsertInodeCache(parent_attr); - partition->DeleteChild(name, parent_attr.version()); + DeleteDentryFromPartition(parent, name, parent_attr.version()); // update quota std::string reason = fmt::format("rmdir.{}.{}", parent, name); @@ -1170,7 +1179,7 @@ Status FileSystem::RmDir(Context& ctx, Ino parent, const std::string& name) { if (IsParentHashPartition()) { NotifyBuddyRefreshInode(std::move(parent_attr)); - NotifyBuddyCleanPartitionCache(dentry.INo(), UINT64_MAX); + NotifyBuddyCleanPartitionCache(dentry.INo()); } else { partition_cache_.Delete(dentry.INo()); } @@ -1224,15 +1233,9 @@ Status FileSystem::Link(Context& ctx, Ino ino, Ino new_parent, const std::string auto& trace = ctx.GetTrace(); - PartitionPtr partition; - auto status = GetPartition(ctx, new_parent, partition); - if (!status.ok()) { - return status; - } - // get inode InodeSPtr inode; - status = GetInode(ctx, ino, inode); + auto status = GetInode(ctx, ino, inode); if (!status.ok()) { return status; } @@ -1256,8 +1259,8 @@ Status FileSystem::Link(Context& ctx, Ino ino, Ino new_parent, const std::string HardLinkOperation operation(trace, dentry); status = RunOperation(&operation); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] link {} -> {}/{} finish, status({}).", fs_id_, duration.ElapsedUs(), - ino, new_parent, new_name, status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] link {} -> {}/{} finish, status({}).", fs_id_, ctx.RequestId(), + duration.ElapsedUs(), ino, new_parent, new_name, status.error_str()); if (!status.ok()) { return Status(pb::error::EBACKEND_STORE, fmt::format("put inode/dentry fail, {}", status.error_str())); @@ -1274,7 +1277,7 @@ Status FileSystem::Link(Context& ctx, Ino ino, Ino new_parent, const std::string // update cache UpsertInodeCache(child_attr); UpsertInodeCache(parent_attr); - partition->PutChild(dentry, parent_attr.version()); + AddDentryToPartition(parent_attr.ino(), dentry, parent_attr.version()); entry_out.attr = child_attr; entry_out.parent_version = parent_attr.version(); @@ -1331,7 +1334,7 @@ Status FileSystem::UnLink(Context& ctx, Ino parent, const std::string& name, Ent auto& parent_attr = result.attr; auto& child_attr = result.child_attr; - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] unlink {}/{} finish, nlink({}) status({}).", fs_id_, + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] unlink {}/{} finish, nlink({}) status({}).", fs_id_, ctx.RequestId(), duration.ElapsedUs(), parent, name, child_attr.nlink(), status.error_str()); if (!status.ok()) { @@ -1348,7 +1351,7 @@ Status FileSystem::UnLink(Context& ctx, Ino parent, const std::string& name, Ent quota_manager_->AsyncUpdateDirUsage(parent, -delta_bytes, -1, reason); // update cache - partition->DeleteChild(name, parent_attr.version()); + DeleteDentryFromPartition(parent, name, parent_attr.version()); UpsertInodeCache(parent_attr); UpsertInodeCache(child_attr); @@ -1381,15 +1384,9 @@ Status FileSystem::Symlink(Context& ctx, const std::string& symlink, Ino new_par auto& trace = ctx.GetTrace(); - PartitionPtr partition; - auto status = GetPartition(ctx, new_parent, partition); - if (!status.ok()) { - return status; - } - // generate inode id Ino ino = 0; - status = GenFileIno(ino); + auto status = GenFileIno(ino); if (!status.ok()) { return status; } @@ -1431,8 +1428,8 @@ Status FileSystem::Symlink(Context& ctx, const std::string& symlink, Ino new_par status = RunOperation(&operation); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] symlink {}/{} finish, status({}).", fs_id_, duration.ElapsedUs(), - new_parent, new_name, status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] symlink {}/{} finish, status({}).", fs_id_, ctx.RequestId(), + duration.ElapsedUs(), new_parent, new_name, status.error_str()); if (!status.ok()) { return Status(pb::error::EBACKEND_STORE, fmt::format("put inode/dentry fail, {}", status.error_str())); @@ -1444,7 +1441,7 @@ Status FileSystem::Symlink(Context& ctx, const std::string& symlink, Ino new_par // update cache UpsertInodeCache(ino, inode); UpsertInodeCache(parent_attr); - partition->PutChild(dentry, parent_attr.version()); + AddDentryToPartition(parent_attr.ino(), dentry, parent_attr.version()); // update quota std::string reason = fmt::format("symlink.{}.{}", new_parent, new_name); @@ -1529,8 +1526,8 @@ Status FileSystem::SetAttr(Context& ctx, Ino ino, const SetAttrParam& param, Ent status = RunOperation(&operation); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] setattr {} finish, status({}).", fs_id_, duration.ElapsedUs(), ino, - status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] setattr {} finish, status({}).", fs_id_, ctx.RequestId(), + duration.ElapsedUs(), ino, status.error_str()); if (!status.ok()) { return Status(pb::error::EBACKEND_STORE, fmt::format("put inode fail, {}", status.error_str())); @@ -1617,8 +1614,8 @@ Status FileSystem::SetXAttr(Context& ctx, Ino ino, const Inode::XAttrMap& xattrs status = RunOperation(&operation); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] setxattr {} finish, status({}).", fs_id_, duration.ElapsedUs(), ino, - status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] setxattr {} finish, status({}).", fs_id_, ctx.RequestId(), + duration.ElapsedUs(), ino, status.error_str()); if (!status.ok()) { return Status(pb::error::EBACKEND_STORE, fmt::format("put inode fail, {}", status.error_str())); @@ -1658,8 +1655,8 @@ Status FileSystem::RemoveXAttr(Context& ctx, Ino ino, const std::string& name, E status = RunOperation(&operation); - DINGO_LOG(INFO) << fmt::format("[fs.{}][{}us] removexattr {} finish, status({}).", fs_id_, duration.ElapsedUs(), ino, - status.error_str()); + DINGO_LOG(INFO) << fmt::format("[fs.{}.{}][{}us] removexattr {} finish, status({}).", fs_id_, ctx.RequestId(), + duration.ElapsedUs(), ino, status.error_str()); if (!status.ok()) { return Status(pb::error::EBACKEND_STORE, fmt::format("put inode fail, {}", status.error_str())); @@ -1718,16 +1715,16 @@ void FileSystem::NotifyBuddyRefreshInode(AttrEntry&& attr) { } } -void FileSystem::NotifyBuddyCleanPartitionCache(Ino ino, uint64_t version) { +void FileSystem::NotifyBuddyCleanPartitionCache(Ino ino) { if (notify_buddy_ == nullptr) return; auto mds_id = GetMdsIdByIno(ino); CHECK(mds_id != 0) << fmt::format("mds id should not be 0, ino({}).", ino); if (mds_id == self_mds_id_) { - partition_cache_.DeleteIf(ino, version); + partition_cache_.Delete(ino); } else { - notify_buddy_->AsyncNotify(notify::CleanPartitionCacheMessage::Create(mds_id, fs_id_, ino, version)); + notify_buddy_->AsyncNotify(notify::CleanPartitionCacheMessage::Create(mds_id, fs_id_, ino, 0)); } } @@ -1738,8 +1735,6 @@ Status FileSystem::Rename(Context& ctx, const RenameParam& param, uint64_t& old_ Ino new_parent = param.new_parent; const std::string& new_name = param.new_name; - DINGO_LOG(INFO) << fmt::format("fs.{}] rename {}/{} to {}/{}.", fs_id_, old_parent, old_name, new_parent, new_name); - auto& trace = ctx.GetTrace(); Duration duration; @@ -1790,10 +1785,10 @@ Status FileSystem::Rename(Context& ctx, const RenameParam& param, uint64_t& old_ bool is_exist_new_dentry = result.is_exist_new_dentry; DINGO_LOG(INFO) << fmt::format( - "[fs.{}][{}us] rename {}/{} -> {}/{} finish, state({},{}) version({},{}) " + "[fs.{}.{}][{}us] rename {}/{} -> {}/{} finish, state({},{}) version({},{}) " "status({}).", - fs_id_, duration.ElapsedUs(), old_parent, old_name, new_parent, new_name, is_same_parent, is_exist_new_dentry, - old_parent_attr.version(), new_parent_attr.version(), status.error_str()); + fs_id_, ctx.RequestId(), duration.ElapsedUs(), old_parent, old_name, new_parent, new_name, is_same_parent, + is_exist_new_dentry, old_parent_attr.version(), new_parent_attr.version(), status.error_str()); if (!status.ok()) { return status; @@ -1847,8 +1842,8 @@ Status FileSystem::Rename(Context& ctx, const RenameParam& param, uint64_t& old_ } else { // clean partition cache - NotifyBuddyCleanPartitionCache(old_parent, old_parent_attr.version()); - if (!is_same_parent) NotifyBuddyCleanPartitionCache(new_parent, new_parent_attr.version()); + // NotifyBuddyCleanPartitionCache(old_parent, old_parent_attr.version()); + // if (!is_same_parent) NotifyBuddyCleanPartitionCache(new_parent, new_parent_attr.version()); // refresh parent of parent inode cache NotifyBuddyRefreshInode(std::move(old_parent_attr)); @@ -1857,7 +1852,7 @@ Status FileSystem::Rename(Context& ctx, const RenameParam& param, uint64_t& old_ // delete exist new partition if (is_exist_new_dentry) { if (prev_new_dentry.type() == pb::mds::FileType::DIRECTORY) { - NotifyBuddyCleanPartitionCache(prev_new_dentry.ino(), UINT64_MAX); + NotifyBuddyCleanPartitionCache(prev_new_dentry.ino()); } else { if (prev_new_attr.nlink() <= 0) { DeleteInodeFromCache(prev_new_attr.ino()); @@ -2295,15 +2290,6 @@ Status FileSystem::BatchGetXAttr(Context& ctx, const std::vector& inoe return Status::OK(); } -Status FileSystem::RefreshInode(const std::vector& inoes) { - for (const auto& ino : inoes) { - partition_cache_.Delete(ino); - inode_cache_.Delete(ino); - } - - return Status::OK(); -} - void FileSystem::RefreshInode(AttrEntry& attr) { UpsertInodeCache(attr); } Status FileSystem::RefreshFsInfo(const std::string& reason) { return RefreshFsInfo(fs_info_->GetName(), reason); } diff --git a/src/mds/filesystem/filesystem.h b/src/mds/filesystem/filesystem.h index 9a5a90520..2c9aa8481 100644 --- a/src/mds/filesystem/filesystem.h +++ b/src/mds/filesystem/filesystem.h @@ -207,7 +207,6 @@ class FileSystem : public std::enable_shared_from_this { Status BatchGetInode(Context& ctx, const std::vector& inoes, std::vector& out_entries); Status BatchGetXAttr(Context& ctx, const std::vector& inoes, std::vector& out_xattrs); - Status RefreshInode(const std::vector& inoes); void RefreshInode(AttrEntry& attr); Status RefreshFsInfo(const std::string& reason); @@ -245,12 +244,15 @@ class FileSystem : public std::enable_shared_from_this { bool CanServe(uint64_t self_mds_id); Status GetPartitionParentInode(Context& ctx, PartitionPtr& partition, InodeSPtr& out_inode); + void AddDentryToPartition(Ino parent, const Dentry& dentry, uint64_t version); + void DeleteDentryFromPartition(Ino parent, const std::string& name, uint64_t version); + // get partition Status GetPartition(Context& ctx, Ino parent, PartitionPtr& out_partition); Status GetPartition(Context& ctx, uint64_t version, Ino parent, PartitionPtr& out_partition); PartitionPtr GetPartitionFromCache(Ino parent); std::map GetAllPartitionsFromCache(); - Status GetPartitionFromStore(Ino parent, const std::string& reason, PartitionPtr& out_partition); + Status GetPartitionFromStore(Context& ctx, Ino parent, const std::string& reason, PartitionPtr& out_partition); // get dentry Status GetDentryFromStore(Ino parent, const std::string& name, Dentry& dentry); @@ -263,7 +265,7 @@ class FileSystem : public std::enable_shared_from_this { Status GetInode(Context& ctx, Ino ino, InodeSPtr& out_inode); Status GetInode(Context& ctx, uint64_t version, Ino ino, InodeSPtr& out_inode); - Status GetInodeFromStore(Ino ino, const std::string& reason, bool is_cache, InodeSPtr& out_inode); + Status GetInodeFromStore(Context& ctx, Ino ino, const std::string& reason, bool is_cache, InodeSPtr& out_inode); Status BatchGetInodeFromStore(std::vector inoes, std::vector& out_inodes); Status GetDelFileFromStore(Ino ino, AttrEntry& out_attr); @@ -290,7 +292,7 @@ class FileSystem : public std::enable_shared_from_this { void NotifyBuddyRefreshFsInfo(std::vector mds_ids, const FsInfoEntry& fs_info); void NotifyBuddyRefreshInode(AttrEntry&& attr); - void NotifyBuddyCleanPartitionCache(Ino ino, uint64_t version); + void NotifyBuddyCleanPartitionCache(Ino ino); uint64_t self_mds_id_; diff --git a/src/mds/filesystem/partition.cc b/src/mds/filesystem/partition.cc index 2a9ecbca6..0a9ec574c 100644 --- a/src/mds/filesystem/partition.cc +++ b/src/mds/filesystem/partition.cc @@ -14,6 +14,8 @@ #include "mds/filesystem/partition.h" +#include + #include #include #include @@ -31,10 +33,16 @@ DEFINE_uint32(mds_partition_cache_max_count, 4 * 1024 * 1024, "partition cache m const uint32_t kDentryDefaultNum = 1024; -uint64_t Partition::Version() { +uint64_t Partition::BaseVersion() { + utils::ReadLockGuard lk(lock_); + + return base_version_; +} + +uint64_t Partition::DeltaVersion() { utils::ReadLockGuard lk(lock_); - return version_; + return delta_version_; } InodeSPtr Partition::ParentInode() { @@ -59,15 +67,24 @@ void Partition::PutChild(const Dentry& dentry, uint64_t version) { children_[dentry.Name()] = dentry; } - version_ = std::max(version, version_); + delta_dentry_ops_.push_back(DentryOp{DentryOpType::ADD, version, dentry}); + + delta_version_ = std::max(version, delta_version_); } void Partition::DeleteChild(const std::string& name, uint64_t version) { utils::WriteLockGuard lk(lock_); - children_.erase(name); + Dentry detry(name); + auto it = children_.find(name); + if (it != children_.end()) { + detry = it->second; + children_.erase(it); + } + + delta_dentry_ops_.push_back(DentryOp{DentryOpType::DELETE, version, detry}); - version_ = std::max(version, version_); + delta_version_ = std::max(version, delta_version_); } void Partition::DeleteChildIf(const std::string& name, Ino ino, uint64_t version) { @@ -75,10 +92,11 @@ void Partition::DeleteChildIf(const std::string& name, Ino ino, uint64_t version auto it = children_.find(name); if (it != children_.end() && it->second.INo() == ino) { + delta_dentry_ops_.push_back(DentryOp{DentryOpType::DELETE, version, it->second}); children_.erase(it); } - version_ = std::max(version, version_); + delta_version_ = std::max(version, delta_version_); } bool Partition::HasChild() { @@ -131,15 +149,56 @@ std::vector Partition::GetAllChildren() { return dentries; } +bool Partition::Merge(PartitionPtr& other_partition) { + CHECK(other_partition->ino_ == ino_) << "merge partition error, ino not match."; + + utils::WriteLockGuard lk(lock_); + + if (other_partition->BaseVersion() <= base_version_) return false; + + DINGO_LOG(INFO) << fmt::format("[partition.{}] merge, self({},{},{},{}) other({},{}).", ino_, base_version_, + delta_version_, children_.size(), delta_dentry_ops_.size(), + other_partition->BaseVersion(), other_partition->children_.size()); + + base_version_ = other_partition->BaseVersion(); + children_.swap(other_partition->children_); + + // apply delta ops + delta_dentry_ops_.sort([](const DentryOp& a, const DentryOp& b) -> bool { return a.version < b.version; }); + + delta_version_ = base_version_; + for (const auto& op : delta_dentry_ops_) { + if (op.version <= base_version_) continue; + + if (op.op_type == DentryOpType::ADD) { + children_[op.dentry.Name()] = op.dentry; + + } else if (op.op_type == DentryOpType::DELETE) { + children_.erase(op.dentry.Name()); + } + + delta_version_ = std::max(delta_version_, op.version); + } + + delta_dentry_ops_.clear(); + + return true; +} + PartitionCache::PartitionCache(uint32_t fs_id) : fs_id_(fs_id), cache_(FLAGS_mds_partition_cache_max_count, std::make_shared(fmt::format(kPartitionMetricsPrefix, fs_id))) {} PartitionCache::~PartitionCache() {} // NOLINT -void PartitionCache::PutIf(Ino ino, PartitionPtr partition) { - cache_.PutIf(ino, partition, - [&](PartitionPtr& old_partition) { return old_partition->Version() < partition->Version(); }); +PartitionPtr PartitionCache::PutIf(Ino ino, PartitionPtr partition) { + PartitionPtr new_partition = partition; + cache_.PutInplaceIf(ino, partition, [&](PartitionPtr& old_partition) { + old_partition->Merge(partition); + new_partition = old_partition; + }); + + return new_partition; } void PartitionCache::Delete(Ino ino) { @@ -148,10 +207,6 @@ void PartitionCache::Delete(Ino ino) { cache_.Remove(ino); } -void PartitionCache::DeleteIf(Ino ino, uint64_t version) { - cache_.RemoveIf(ino, [&](const PartitionPtr& partition) { return partition->Version() < version; }); -} - void PartitionCache::BatchDeleteInodeIf(const std::function& f) { DINGO_LOG(INFO) << fmt::format("[cache.partition.{}] batch delete inode.", fs_id_); diff --git a/src/mds/filesystem/partition.h b/src/mds/filesystem/partition.h index f27473222..da2399769 100644 --- a/src/mds/filesystem/partition.h +++ b/src/mds/filesystem/partition.h @@ -16,8 +16,12 @@ #define DINGOFS_MDS_FILESYSTEM_PARTITION_H_ #include +#include #include +#include +#include +#include "absl/container/btree_map.h" #include "json/value.h" #include "mds/filesystem/dentry.h" #include "mds/filesystem/inode.h" @@ -32,14 +36,16 @@ using PartitionPtr = std::shared_ptr; // consider locality class Partition { public: - Partition(InodeSPtr inode) : ino_(inode->Ino()), inode_(inode), version_(inode->Version()) {}; + Partition(InodeSPtr inode) : ino_(inode->Ino()), inode_(inode), base_version_(inode->Version()) {}; ~Partition() = default; static PartitionPtr New(InodeSPtr inode) { return std::make_shared(inode); } Ino INo() const { return ino_; } - uint64_t Version(); + uint64_t BaseVersion(); + uint64_t DeltaVersion(); + InodeSPtr ParentInode(); void SetParentInode(InodeSPtr parent_inode); @@ -52,16 +58,30 @@ class Partition { std::vector GetChildren(const std::string& start_name, uint32_t limit, bool is_only_dir); std::vector GetAllChildren(); + bool Merge(PartitionPtr& other_partition); + private: const Ino ino_; utils::RWLock lock_; + uint64_t base_version_{0}; + uint64_t delta_version_{0}; + InodeWPtr inode_; - uint64_t version_{0}; // name -> dentry - std::map children_; + absl::btree_map children_; + + // version -> dentry + enum class DentryOpType : uint8_t { ADD = 0, DELETE = 1 }; + + struct DentryOp { + DentryOpType op_type; + uint64_t version; + Dentry dentry; + }; + std::list delta_dentry_ops_; }; // use lru cache to store partition @@ -75,9 +95,8 @@ class PartitionCache { PartitionCache(PartitionCache&&) = delete; PartitionCache& operator=(PartitionCache&&) = delete; - void PutIf(Ino ino, PartitionPtr partition); + PartitionPtr PutIf(Ino ino, PartitionPtr partition); void Delete(Ino ino); - void DeleteIf(Ino ino, uint64_t version); void BatchDeleteInodeIf(const std::function& f); void Clear(); diff --git a/src/mds/service/debug_service.cc b/src/mds/service/debug_service.cc index 9543f9560..ed4d421f6 100644 --- a/src/mds/service/debug_service.cc +++ b/src/mds/service/debug_service.cc @@ -151,7 +151,7 @@ void DebugServiceImpl::GetPartition(google::protobuf::RpcController* controller, return; } - Context ctx(false, 0); + Context ctx(request->context(), request->info().request_id(), __func__); PartitionPtr partition; auto status = fs->GetPartition(ctx, request->parent(), partition); if (!status.ok()) { @@ -185,7 +185,7 @@ void DebugServiceImpl::GetInode(google::protobuf::RpcController*, const pb::debu return ServiceHelper::SetError(response->mutable_error(), pb::error::ENOT_FOUND, "fs not found"); } - Context ctx(!request->use_cache(), 0); + Context ctx(request->context(), request->info().request_id(), __func__); if (request->inoes().empty() && request->use_cache()) { auto inode_map = fs->GetAllInodesFromCache(); diff --git a/src/mds/service/fsstat_service.cc b/src/mds/service/fsstat_service.cc index d4d9082b1..6fff52b7e 100644 --- a/src/mds/service/fsstat_service.cc +++ b/src/mds/service/fsstat_service.cc @@ -1733,7 +1733,8 @@ void FsStatServiceImpl::default_method(::google::protobuf::RpcController* contro auto file_system = file_system_set->GetFileSystem(fs_id); if (file_system != nullptr) { InodeSPtr inode; - auto status = file_system->GetInodeFromStore(ino, "Stat", false, inode); + Context ctx("11111111111111", "fsstatservice"); + auto status = file_system->GetInodeFromStore(ctx, ino, "Stat", false, inode); if (status.ok()) { RenderInodePage(inode->Copy(), os); diff --git a/src/mds/service/mds_service.cc b/src/mds/service/mds_service.cc index d4c295266..a288cff3c 100644 --- a/src/mds/service/mds_service.cc +++ b/src/mds/service/mds_service.cc @@ -209,7 +209,7 @@ void MDSServiceImpl::DoGetMDSList(google::protobuf::RpcController*, const pb::md auto heartbeat = Server::GetInstance().GetHeartbeat(); - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); std::vector mdses; status = heartbeat->GetMDSList(ctx, mdses); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -323,7 +323,7 @@ void MDSServiceImpl::DoMountFs(google::protobuf::RpcController*, const pb::mds:: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); status = file_system_set_->MountFs(ctx, request->fs_name(), request->mount_point()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); if (BAIDU_UNLIKELY(!status.ok())) { @@ -382,7 +382,7 @@ void MDSServiceImpl::DoUmountFs(google::protobuf::RpcController*, const pb::mds: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); status = file_system_set_->UmountFs(ctx, request->fs_name(), request->client_id()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); if (BAIDU_UNLIKELY(!status.ok())) { @@ -435,7 +435,7 @@ void MDSServiceImpl::DoDeleteFs(google::protobuf::RpcController*, const pb::mds: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); status = file_system_set_->DeleteFs(ctx, request->fs_name(), request->is_force()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); if (BAIDU_UNLIKELY(!status.ok())) { @@ -490,7 +490,7 @@ void MDSServiceImpl::DoGetFsInfo(google::protobuf::RpcController*, const pb::mds fs_name = file_system->FsName(); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); pb::mds::FsInfo fs_info; auto status = file_system_set_->GetFsInfo(ctx, fs_name, fs_info); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -542,7 +542,7 @@ void MDSServiceImpl::DoListFsInfo(google::protobuf::RpcController*, const pb::md return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); std::vector fs_infoes; status = file_system_set_->GetAllFsInfo(ctx, true, fs_infoes); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -579,7 +579,7 @@ void MDSServiceImpl::DoUpdateFsInfo(google::protobuf::RpcController*, const pb:: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); status = file_system_set_->UpdateFsInfo(ctx, request->fs_name(), request->fs_info()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); if (BAIDU_UNLIKELY(!status.ok())) { @@ -630,8 +630,7 @@ void MDSServiceImpl::DoGetDentry(google::protobuf::RpcController*, const pb::mds return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); Dentry dentry; status = file_system->GetDentry(ctx, request->parent(), request->name(), dentry); @@ -671,8 +670,7 @@ void MDSServiceImpl::DoListDentry(google::protobuf::RpcController*, const pb::md return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::vector dentries; uint32_t limit = request->limit() > 0 ? request->limit() : UINT32_MAX; @@ -732,8 +730,7 @@ void MDSServiceImpl::DoGetInode(google::protobuf::RpcController*, const pb::mds: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->GetInode(ctx, request->ino(), entry_out); @@ -773,8 +770,7 @@ void MDSServiceImpl::DoBatchGetInode(google::protobuf::RpcController*, const pb: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::vector entries; status = file_system->BatchGetInode(ctx, Helper::PbRepeatedToVector(request->inoes()), entries); @@ -816,8 +812,7 @@ void MDSServiceImpl::DoBatchGetXAttr(google::protobuf::RpcController*, const pb: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::vector xattrs; status = file_system->BatchGetXAttr(ctx, Helper::PbRepeatedToVector(request->inoes()), xattrs); @@ -860,8 +855,7 @@ void MDSServiceImpl::DoLookup(google::protobuf::RpcController* controller, const return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->Lookup(ctx, request->parent(), request->name(), entry_out); @@ -914,9 +908,7 @@ void MDSServiceImpl::DoBatchCreate(google::protobuf::RpcController*, const pb::m params.push_back(param); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - ctx.SetAncestors(Helper::PbRepeatedToVector(request->context().ancestors())); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; std::vector session_ids; @@ -985,9 +977,7 @@ void MDSServiceImpl::DoMkNod(google::protobuf::RpcController*, const pb::mds::Mk param.gid = request->gid(); param.rdev = request->rdev(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - ctx.SetAncestors(Helper::PbRepeatedToVector(request->context().ancestors())); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->MkNod(ctx, param, entry_out); @@ -1035,9 +1025,7 @@ void MDSServiceImpl::DoMkDir(google::protobuf::RpcController*, const pb::mds::Mk param.gid = request->gid(); param.rdev = request->rdev(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - ctx.SetAncestors(Helper::PbRepeatedToVector(request->context().ancestors())); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->MkDir(ctx, param, entry_out); @@ -1077,9 +1065,7 @@ void MDSServiceImpl::DoRmDir(google::protobuf::RpcController*, const pb::mds::Rm return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - ctx.SetAncestors(Helper::PbRepeatedToVector(request->context().ancestors())); + Context ctx(request->context(), request->info().request_id(), __func__); status = file_system->RmDir(ctx, request->parent(), request->name()); if (BAIDU_UNLIKELY(!status.ok())) { @@ -1114,8 +1100,7 @@ void MDSServiceImpl::DoReadDir(google::protobuf::RpcController*, const pb::mds:: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::vector entry_outs; status = file_system->ReadDir(ctx, request->ino(), request->last_name(), request->limit(), request->with_attr(), @@ -1133,6 +1118,8 @@ void MDSServiceImpl::DoReadDir(google::protobuf::RpcController*, const pb::mds:: mut_entry->mutable_inode()->Swap(&entry_out.attr); } } + + response->set_count(entry_outs.size()); } void MDSServiceImpl::ReadDir(google::protobuf::RpcController* controller, const pb::mds::ReadDirRequest* request, @@ -1162,8 +1149,7 @@ void MDSServiceImpl::DoOpen(google::protobuf::RpcController*, const pb::mds::Ope return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version(), req_ctx.client_id()); + Context ctx(request->context(), request->info().request_id(), __func__); std::string session_id; EntryOut entry_out; @@ -1207,8 +1193,7 @@ void MDSServiceImpl::DoRelease(google::protobuf::RpcController*, const pb::mds:: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version(), req_ctx.client_id()); + Context ctx(request->context(), request->info().request_id(), __func__); status = file_system->Release(ctx, request->ino(), request->session_id()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -1244,9 +1229,7 @@ void MDSServiceImpl::DoLink(google::protobuf::RpcController*, const pb::mds::Lin return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - ctx.SetAncestors(Helper::PbRepeatedToVector(request->context().ancestors())); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->Link(ctx, request->ino(), request->new_parent(), request->new_name(), entry_out); @@ -1286,9 +1269,7 @@ void MDSServiceImpl::DoUnLink(google::protobuf::RpcController*, const pb::mds::U return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - ctx.SetAncestors(Helper::PbRepeatedToVector(request->context().ancestors())); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->UnLink(ctx, request->parent(), request->name(), entry_out); @@ -1328,9 +1309,7 @@ void MDSServiceImpl::DoSymlink(google::protobuf::RpcController*, const pb::mds:: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - ctx.SetAncestors(Helper::PbRepeatedToVector(request->context().ancestors())); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->Symlink(ctx, request->symlink(), request->new_parent(), request->new_name(), request->uid(), @@ -1371,8 +1350,7 @@ void MDSServiceImpl::DoReadLink(google::protobuf::RpcController*, const pb::mds: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::string symlink; status = file_system->ReadLink(ctx, request->ino(), symlink); @@ -1411,8 +1389,7 @@ void MDSServiceImpl::DoGetAttr(google::protobuf::RpcController*, const pb::mds:: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->GetAttr(ctx, request->ino(), entry_out); @@ -1483,8 +1460,7 @@ void MDSServiceImpl::DoSetAttr(google::protobuf::RpcController*, const pb::mds:: attr.set_flags(request->flags()); param.to_set = request->to_set(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->SetAttr(ctx, request->ino(), param, entry_out); @@ -1541,8 +1517,7 @@ void MDSServiceImpl::DoGetXAttr(google::protobuf::RpcController*, const pb::mds: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::string value; status = file_system->GetXAttr(ctx, request->ino(), request->name(), value); @@ -1602,8 +1577,7 @@ void MDSServiceImpl::DoSetXAttr(google::protobuf::RpcController*, const pb::mds: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->SetXAttr(ctx, request->ino(), request->xattrs(), entry_out); @@ -1663,8 +1637,7 @@ void MDSServiceImpl::DoRemoveXAttr(google::protobuf::RpcController*, const pb::m return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->RemoveXAttr(ctx, request->ino(), request->name(), entry_out); @@ -1725,8 +1698,7 @@ void MDSServiceImpl::DoListXAttr(google::protobuf::RpcController*, const pb::mds return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); Inode::XAttrMap xattrs; status = file_system->GetXAttr(ctx, request->ino(), xattrs); @@ -1765,8 +1737,7 @@ void MDSServiceImpl::DoRename(google::protobuf::RpcController*, const pb::mds::R return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); FileSystem::RenameParam param; param.old_parent = request->old_parent(); @@ -1850,8 +1821,7 @@ void MDSServiceImpl::DoWriteSlice(google::protobuf::RpcController*, const pb::md return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::vector chunk_descriptors; status = file_system->WriteSlice(ctx, request->parent(), request->ino(), @@ -1915,8 +1885,7 @@ void MDSServiceImpl::DoReadSlice(google::protobuf::RpcController*, const pb::mds return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::vector chunks; status = @@ -1978,8 +1947,7 @@ void MDSServiceImpl::DoFallocate(google::protobuf::RpcController*, const pb::mds return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); EntryOut entry_out; status = file_system->Fallocate(ctx, request->ino(), request->mode(), request->offset(), request->len(), entry_out); @@ -2038,8 +2006,7 @@ void MDSServiceImpl::DoCompactChunk(google::protobuf::RpcController*, const pb:: return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); } - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::vector trash_slices; status = file_system->CompactChunk(ctx, request->ino(), request->chunk_index(), trash_slices); @@ -2094,11 +2061,10 @@ void MDSServiceImpl::DoCleanTrashSlice(google::protobuf::RpcController*, const p brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - auto& trace = ctx.GetTrace(); + Context ctx(request->context(), request->info().request_id(), __func__); - auto status = gc_processor_->ManualCleanDelSlice(trace, request->fs_id(), request->ino(), request->chunk_index()); + auto status = + gc_processor_->ManualCleanDelSlice(ctx.GetTrace(), request->fs_id(), request->ino(), request->chunk_index()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); if (BAIDU_UNLIKELY(!status.ok())) { return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); @@ -2147,11 +2113,9 @@ void MDSServiceImpl::DoCleanDelFile(google::protobuf::RpcController*, const pb:: brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); - auto& trace = ctx.GetTrace(); + Context ctx(request->context(), request->info().request_id(), __func__); - auto status = gc_processor_->ManualCleanDelFile(trace, request->fs_id(), request->ino()); + auto status = gc_processor_->ManualCleanDelFile(ctx.GetTrace(), request->fs_id(), request->ino()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); if (BAIDU_UNLIKELY(!status.ok())) { return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); @@ -2207,8 +2171,7 @@ void MDSServiceImpl::DoSetFsQuota(google::protobuf::RpcController*, const pb::md auto& quota_manager = file_system->GetQuotaManager(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); status = quota_manager.SetFsQuota(ctx.GetTrace(), request->quota()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -2262,11 +2225,10 @@ void MDSServiceImpl::DoGetFsQuota(google::protobuf::RpcController*, const pb::md auto& quota_manager = file_system->GetQuotaManager(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); pb::mds::Quota quota; - status = quota_manager.GetFsQuota(ctx.GetTrace(), req_ctx.is_bypass_cache(), quota); + status = quota_manager.GetFsQuota(ctx.GetTrace(), ctx.IsBypassCache(), quota); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); if (BAIDU_UNLIKELY(!status.ok())) { return ServiceHelper::SetError(response->mutable_error(), status.error_code(), status.error_str()); @@ -2319,8 +2281,7 @@ void MDSServiceImpl::DoSetDirQuota(google::protobuf::RpcController*, const pb::m auto& quota_manager = file_system->GetQuotaManager(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); status = quota_manager.SetDirQuota(ctx.GetTrace(), request->ino(), request->quota(), true); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -2374,8 +2335,7 @@ void MDSServiceImpl::DoGetDirQuota(google::protobuf::RpcController*, const pb::m auto& quota_manager = file_system->GetQuotaManager(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); pb::mds::Quota quota; status = quota_manager.GetDirQuota(ctx.GetTrace(), request->ino(), quota); @@ -2432,8 +2392,7 @@ void MDSServiceImpl::DoDeleteDirQuota(google::protobuf::RpcController*, const pb auto& quota_manager = file_system->GetQuotaManager(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); status = quota_manager.DeleteDirQuota(ctx.GetTrace(), request->ino()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -2487,8 +2446,7 @@ void MDSServiceImpl::DoLoadDirQuotas(google::protobuf::RpcController*, const pb: auto& quota_manager = file_system->GetQuotaManager(); - const auto& req_ctx = request->context(); - Context ctx(req_ctx.is_bypass_cache(), req_ctx.inode_version()); + Context ctx(request->context(), request->info().request_id(), __func__); std::map quotas; status = quota_manager.LoadDirQuotas(ctx.GetTrace(), quotas); @@ -2549,7 +2507,8 @@ void MDSServiceImpl::DoSetFsStats(google::protobuf::RpcController*, const pb::md return ServiceHelper::SetError(response->mutable_error(), pb::error::ENOT_FOUND, "fs not found"); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); + status = fs_stat_->UploadFsStat(ctx, fs_id, request->stats()); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); if (BAIDU_UNLIKELY(!status.ok())) { @@ -2602,7 +2561,8 @@ void MDSServiceImpl::DoGetFsStats(google::protobuf::RpcController*, const pb::md return ServiceHelper::SetError(response->mutable_error(), pb::error::ENOT_FOUND, "fs not found"); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); + pb::mds::FsStatsData stats; status = fs_stat_->GetFsStat(ctx, fs_id, stats); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -2660,7 +2620,8 @@ void MDSServiceImpl::DoGetFsPerSecondStats(google::protobuf::RpcController*, return ServiceHelper::SetError(response->mutable_error(), pb::error::ENOT_FOUND, "fs not found"); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); + std::map stats; status = fs_stat_->GetFsStatsPerSecond(ctx, fs_id, stats); ServiceHelper::SetResponseInfo(ctx.GetTrace(), response->mutable_info()); @@ -2745,8 +2706,7 @@ void MDSServiceImpl::DoNotifyBuddy(google::protobuf::RpcController*, const pb::m return ServiceHelper::SetError(response->mutable_error(), pb::error::ENOT_FOUND, "fs not found"); } - file_system->GetPartitionCache().DeleteIf(message.clean_partition_cache().ino(), - message.clean_partition_cache().version()); + file_system->GetPartitionCache().Delete(message.clean_partition_cache().ino()); } break; @@ -2803,7 +2763,8 @@ void MDSServiceImpl::JoinFs(google::protobuf::RpcController* controller, const p return ServiceHelper::SetError(response->mutable_error(), pb::error::EILLEGAL_PARAMTETER, "mds_ids is empty"); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); + std::vector mds_ids = Helper::PbRepeatedToVector(request->mds_ids()); std::string reason = "manual join fs"; @@ -2831,7 +2792,8 @@ void MDSServiceImpl::QuitFs(google::protobuf::RpcController* controller, const p return ServiceHelper::SetError(response->mutable_error(), pb::error::EILLEGAL_PARAMTETER, "mds_ids is empty"); } - Context ctx; + Context ctx(request->context(), request->info().request_id(), __func__); + std::vector mds_ids = Helper::PbRepeatedToVector(request->mds_ids()); std::string reason = "manual quit fs"; @@ -3055,8 +3017,7 @@ void MDSServiceImpl::DeleteMember(google::protobuf::RpcController* controller, } } -void MDSServiceImpl::DoJoinCacheGroup(google::protobuf::RpcController* controller, - const pb::mds::JoinCacheGroupRequest* request, +void MDSServiceImpl::DoJoinCacheGroup(google::protobuf::RpcController*, const pb::mds::JoinCacheGroupRequest* request, pb::mds::JoinCacheGroupResponse* response, TraceClosure* done) { brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); @@ -3071,8 +3032,7 @@ void MDSServiceImpl::DoJoinCacheGroup(google::protobuf::RpcController* controlle } } -void MDSServiceImpl::DoLeaveCacheGroup(google::protobuf::RpcController* controller, - const pb::mds::LeaveCacheGroupRequest* request, +void MDSServiceImpl::DoLeaveCacheGroup(google::protobuf::RpcController*, const pb::mds::LeaveCacheGroupRequest* request, pb::mds::LeaveCacheGroupResponse* response, TraceClosure* done) { brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); @@ -3086,9 +3046,8 @@ void MDSServiceImpl::DoLeaveCacheGroup(google::protobuf::RpcController* controll } } -void MDSServiceImpl::DoListGroups(google::protobuf::RpcController* controller, - const pb::mds::ListGroupsRequest* request, pb::mds::ListGroupsResponse* response, - TraceClosure* done) { +void MDSServiceImpl::DoListGroups(google::protobuf::RpcController*, const pb::mds::ListGroupsRequest* request, + pb::mds::ListGroupsResponse* response, TraceClosure* done) { brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); @@ -3102,8 +3061,7 @@ void MDSServiceImpl::DoListGroups(google::protobuf::RpcController* controller, *response->mutable_group_names() = {groups.begin(), groups.end()}; } -void MDSServiceImpl::DoReweightMember(google::protobuf::RpcController* controller, - const pb::mds::ReweightMemberRequest* request, +void MDSServiceImpl::DoReweightMember(google::protobuf::RpcController*, const pb::mds::ReweightMemberRequest* request, pb::mds::ReweightMemberResponse* response, TraceClosure* done) { brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); @@ -3117,9 +3075,8 @@ void MDSServiceImpl::DoReweightMember(google::protobuf::RpcController* controlle } } -void MDSServiceImpl::DoListMembers(google::protobuf::RpcController* controller, - const pb::mds::ListMembersRequest* request, pb::mds::ListMembersResponse* response, - TraceClosure* done) { +void MDSServiceImpl::DoListMembers(google::protobuf::RpcController*, const pb::mds::ListMembersRequest* request, + pb::mds::ListMembersResponse* response, TraceClosure* done) { brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); @@ -3133,8 +3090,7 @@ void MDSServiceImpl::DoListMembers(google::protobuf::RpcController* controller, *response->mutable_members() = {members.begin(), members.end()}; } -void MDSServiceImpl::DoUnlockMember(google::protobuf::RpcController* controller, - const pb::mds::UnLockMemberRequest* request, +void MDSServiceImpl::DoUnlockMember(google::protobuf::RpcController*, const pb::mds::UnLockMemberRequest* request, pb::mds::UnLockMemberResponse* response, TraceClosure* done) { brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); @@ -3147,8 +3103,7 @@ void MDSServiceImpl::DoUnlockMember(google::protobuf::RpcController* controller, } } -void MDSServiceImpl::DoDeleteMember(google::protobuf::RpcController* controller, - const pb::mds::DeleteMemberRequest* request, +void MDSServiceImpl::DoDeleteMember(google::protobuf::RpcController*, const pb::mds::DeleteMemberRequest* request, pb::mds::DeleteMemberResponse* response, TraceClosure* done) { brpc::ClosureGuard done_guard(done); done->SetQueueWaitTime(); diff --git a/src/mds/service/service_helper.h b/src/mds/service/service_helper.h index b569ab326..6c87884c0 100644 --- a/src/mds/service/service_helper.h +++ b/src/mds/service/service_helper.h @@ -173,12 +173,12 @@ void ServiceClosure::Run() { mut_time->set_queue_wait_time_us(queue_wait_time_us); if (response_->error().errcode() != 0) { - if (response_->error().errcode() != pb::error::ENOT_FOUND) { - LOG(ERROR) << fmt::format("[service.{}][request_id({})][{}us] Request fail, request({}) response({})", - method_name_, request_->info().request_id(), elapsed_time_us, - request_->ShortDebugString().substr(0, FLAGS_mds_service_log_print_max_length), - response_->ShortDebugString().substr(0, FLAGS_mds_service_log_print_max_length)); - } + // if (response_->error().errcode() != pb::error::ENOT_FOUND) { + LOG(ERROR) << fmt::format("[service.{}][request_id({})][{}us] Request fail, request({}) response({})", method_name_, + request_->info().request_id(), elapsed_time_us, + request_->ShortDebugString().substr(0, FLAGS_mds_service_log_print_max_length), + response_->ShortDebugString().substr(0, FLAGS_mds_service_log_print_max_length)); + } else { if (BAIDU_UNLIKELY(elapsed_time_us >= FLAGS_mds_service_log_threshold_time_us)) { LOG(INFO) << fmt::format("[service.{}][request_id({})][{}us] Request finish, request({}) response({})", From de057d7ef3d7ad95ba101c1ba99ea0224dad7106 Mon Sep 17 00:00:00 2001 From: Zihui Deng <451809218@qq.com> Date: Sun, 16 Nov 2025 17:33:34 +0800 Subject: [PATCH 4/5] [fix][mds] Fixup compact chunk issues(use old file length). --- src/mds/filesystem/filesystem.cc | 13 ++------- src/mds/filesystem/store_operation.cc | 42 +++++++++++++++++++-------- src/mds/filesystem/store_operation.h | 24 +++++---------- 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/src/mds/filesystem/filesystem.cc b/src/mds/filesystem/filesystem.cc index b0c8d612e..c948b4c31 100644 --- a/src/mds/filesystem/filesystem.cc +++ b/src/mds/filesystem/filesystem.cc @@ -1976,8 +1976,7 @@ Status FileSystem::WriteSlice(Context& ctx, Ino, Ino ino, const std::vectorGetIno(), std::move(result.effected_chunk)); }; - operation_processor_->AsyncRun(CompactChunkOperation::New(fs_info, ino, chunk.index(), attr.length()), - post_handler); + operation_processor_->AsyncRun(CompactChunkOperation::New(fs_info, ino, chunk.index()), post_handler); } } } @@ -2139,18 +2138,12 @@ Status FileSystem::CompactChunk(Context& ctx, Ino ino, uint64_t chunk_index, return Status(pb::error::ENOT_SERVE, "can not serve"); } - InodeSPtr inode; - auto status = GetInode(ctx, ino, inode); - if (!status.ok()) { - return status; - } - auto& trace = ctx.GetTrace(); Duration duration; - CompactChunkOperation operation(trace, GetFsInfo(), ino, chunk_index, inode->Length(), true); + CompactChunkOperation operation(trace, GetFsInfo(), ino, chunk_index, true); - status = RunOperation(&operation); + auto status = RunOperation(&operation); auto& result = operation.GetResult(); auto& effected_chunk = result.effected_chunk; diff --git a/src/mds/filesystem/store_operation.cc b/src/mds/filesystem/store_operation.cc index d5ae61921..96b1cbcd8 100644 --- a/src/mds/filesystem/store_operation.cc +++ b/src/mds/filesystem/store_operation.cc @@ -1508,13 +1508,13 @@ Status RenameOperation::Run(TxnUPtr& txn) { bool CompactChunkOperation::MaybeCompact(const FsInfoEntry& fs_info, Ino ino, uint64_t file_length, const ChunkEntry& chunk) { - auto trash_slice_list = GenTrashSlices(fs_info, ino, file_length, chunk); + auto trash_slice_list = GenTrashSlices(fs_info, ino, file_length, chunk, true); return !trash_slice_list.slices().empty(); } TrashSliceList CompactChunkOperation::GenTrashSlices(const FsInfoEntry& fs_info, Ino ino, uint64_t file_length, - const ChunkEntry& chunk) { + const ChunkEntry& chunk, bool is_dry_run) { struct Slice { uint32_t sort_id; uint64_t id; @@ -1753,15 +1753,16 @@ TrashSliceList CompactChunkOperation::GenTrashSlices(const FsInfoEntry& fs_info, slice_id_str += ","; } - DINGO_LOG(INFO) << fmt::format("[operation.{}.{}.{}] trash slice, count({}/{}/{}/{}) slice_ids({}).", fs_id, ino, - chunk.index(), out_of_length_count, complete_overlapped_count, - partial_overlapped_count, trash_slices.slices_size(), slice_id_str); + DINGO_LOG(INFO) << fmt::format( + "[operation.{}.{}.{}] trash slice, is_dry_run({}) length({}) count({}/{}/{}/{}) slice_ids({}).", fs_id, ino, + chunk.index(), is_dry_run, file_length, out_of_length_count, complete_overlapped_count, partial_overlapped_count, + trash_slices.slices_size(), slice_id_str); return trash_slices; } TrashSliceList CompactChunkOperation::GenTrashSlices(Ino ino, uint64_t file_length, const ChunkEntry& chunk) { - return GenTrashSlices(fs_info_, ino, file_length, chunk); + return GenTrashSlices(fs_info_, ino, file_length, chunk, false); } void CompactChunkOperation::UpdateChunk(ChunkEntry& chunk, const TrashSliceList& trash_slices) { @@ -1829,12 +1830,29 @@ Status CompactChunkOperation::Run(TxnUPtr& txn) { CHECK(fs_id > 0) << "fs_id is 0"; CHECK(ino_ > 0) << "ino is 0."; - std::string key = MetaCodec::EncodeChunkKey(fs_id, ino_, chunk_index_); - std::string value; - auto status = txn->Get(key, value); + std::string inode_key = MetaCodec::EncodeInodeKey(fs_id, ino_); + std::string chunk_key = MetaCodec::EncodeChunkKey(fs_id, ino_, chunk_index_); + + std::vector kvs; + auto status = txn->BatchGet({inode_key, chunk_key}, kvs); if (!status.ok()) return status; - ChunkEntry chunk = MetaCodec::DecodeChunkValue(value); + if (kvs.size() != 2) { + return Status(pb::error::ENOT_FOUND, "not found inode/chunk"); + } + + AttrEntry attr; + ChunkEntry chunk; + for (auto& kv : kvs) { + if (kv.key == inode_key) { + attr = MetaCodec::DecodeInodeValue(kv.value); + + } else if (kv.key == chunk_key) { + chunk = MetaCodec::DecodeChunkValue(kv.value); + } + } + CHECK(attr.ino() > 0) << "attr is null."; + CHECK(chunk.index() == chunk_index_) << "chunk index not match."; // reduce compact frequency if (!is_force_ && chunk.last_compaction_time_ms() + FLAGS_mds_compact_chunk_interval_ms > @@ -1842,11 +1860,11 @@ Status CompactChunkOperation::Run(TxnUPtr& txn) { return Status(pb::error::ENOT_MATCH, "not match compact condition"); } - auto trash_slice_list = CompactChunk(txn, fs_id, ino_, file_length_, chunk); + auto trash_slice_list = CompactChunk(txn, fs_id, ino_, attr.length(), chunk); if (!trash_slice_list.slices().empty()) { chunk.set_version(chunk.version() + 1); chunk.set_last_compaction_time_ms(Helper::TimestampMs()); - txn->Put(key, MetaCodec::EncodeChunkValue(chunk)); + txn->Put(chunk_key, MetaCodec::EncodeChunkValue(chunk)); result_.trash_slice_list = std::move(trash_slice_list); result_.effected_chunk = std::move(chunk); diff --git a/src/mds/filesystem/store_operation.h b/src/mds/filesystem/store_operation.h index 4b4f6af0b..d108e56cb 100644 --- a/src/mds/filesystem/store_operation.h +++ b/src/mds/filesystem/store_operation.h @@ -971,16 +971,10 @@ using CompactChunkOperationSPtr = std::shared_ptr; class CompactChunkOperation : public Operation { public: - CompactChunkOperation(Trace& trace, const FsInfoEntry& fs_info, uint64_t ino, uint64_t chunk_index, - uint64_t file_length, bool is_force) - : Operation(trace), - fs_info_(fs_info), - ino_(ino), - chunk_index_(chunk_index), - file_length_(file_length), - is_force_(is_force) {}; - CompactChunkOperation(const FsInfoEntry& fs_info, uint64_t ino, uint64_t chunk_index, uint64_t file_length) - : Operation(trace_), fs_info_(fs_info), ino_(ino), chunk_index_(chunk_index), file_length_(file_length) {}; + CompactChunkOperation(Trace& trace, const FsInfoEntry& fs_info, uint64_t ino, uint64_t chunk_index, bool is_force) + : Operation(trace), fs_info_(fs_info), ino_(ino), chunk_index_(chunk_index), is_force_(is_force) {}; + CompactChunkOperation(const FsInfoEntry& fs_info, uint64_t ino, uint64_t chunk_index) + : Operation(trace_), fs_info_(fs_info), ino_(ino), chunk_index_(chunk_index) {}; ~CompactChunkOperation() override = default; struct Result : public Operation::Result { @@ -988,9 +982,8 @@ class CompactChunkOperation : public Operation { ChunkEntry effected_chunk; }; - static CompactChunkOperationSPtr New(const FsInfoEntry& fs_info, uint64_t ino, uint64_t chunk_index, - uint64_t file_length) { - return std::make_shared(fs_info, ino, chunk_index, file_length); + static CompactChunkOperationSPtr New(const FsInfoEntry& fs_info, uint64_t ino, uint64_t chunk_index) { + return std::make_shared(fs_info, ino, chunk_index); } OpType GetOpType() const override { return OpType::kCompactChunk; } @@ -1013,12 +1006,12 @@ class CompactChunkOperation : public Operation { static TrashSliceList TestGenTrashSlices(const FsInfoEntry& fs_info, Ino ino, uint64_t file_length, const ChunkEntry& chunk) { - return GenTrashSlices(fs_info, ino, file_length, chunk); + return GenTrashSlices(fs_info, ino, file_length, chunk, true); } private: static TrashSliceList GenTrashSlices(const FsInfoEntry& fs_info, Ino ino, uint64_t file_length, - const ChunkEntry& chunk); + const ChunkEntry& chunk, bool is_dry_run); TrashSliceList GenTrashSlices(Ino ino, uint64_t file_length, const ChunkEntry& chunk); static void UpdateChunk(ChunkEntry& chunk, const TrashSliceList& trash_slices); TrashSliceList DoCompactChunk(Ino ino, uint64_t file_length, ChunkEntry& chunk); @@ -1028,7 +1021,6 @@ class CompactChunkOperation : public Operation { FsInfoEntry fs_info_; uint64_t ino_; uint64_t chunk_index_{0}; - uint64_t file_length_{0}; bool is_force_{false}; From 2d3ab9ac15c3b42f991ac131d8db2c491950fbd0 Mon Sep 17 00:00:00 2001 From: Zihui Deng <451809218@qq.com> Date: Tue, 18 Nov 2025 16:02:01 +0800 Subject: [PATCH 5/5] [fix][mds] Fixup readdir issues again. --- src/client/fuse/fuse_op.cc | 74 ++++++---- src/client/vfs/metasystem/mds/dir_iterator.cc | 73 +++++---- src/client/vfs/metasystem/mds/dir_iterator.h | 20 ++- src/client/vfs/metasystem/mds/filesystem.cc | 22 ++- src/client/vfs/vfs_impl.cc | 14 +- src/client/vfs/vfs_impl.h | 1 + src/client/vfs/vfs_wrapper.cc | 19 +-- src/common/options/client.cc | 15 +- src/common/options/client.h | 5 +- src/mds/filesystem/filesystem.cc | 24 +-- src/mds/filesystem/partition.cc | 48 ++++-- src/mds/filesystem/partition.h | 10 +- test/unit/mds/filesystem/test_dentry.cc | 138 +++++++++++++++++- test/unit/mds/filesystem/test_partition.cc | 64 ++++++-- 14 files changed, 391 insertions(+), 136 deletions(-) diff --git a/src/client/fuse/fuse_op.cc b/src/client/fuse/fuse_op.cc index 02145778e..855866e5e 100644 --- a/src/client/fuse/fuse_op.cc +++ b/src/client/fuse/fuse_op.cc @@ -16,7 +16,11 @@ #include "client/fuse/fuse_op.h" +#include + +#include #include +#include #include #include #include @@ -38,6 +42,7 @@ static dingofs::client::vfs::VFSWrapper* g_vfs = nullptr; USING_FLAG(client_fuse_file_info_direct_io) USING_FLAG(client_fuse_file_info_keep_cache) +USING_FLAG(client_fuse_enable_readdir_cache) using dingofs::Status; using dingofs::client::vfs::Attr; @@ -91,10 +96,8 @@ void Attr2FuseEntry(const Attr& attr, struct fuse_entry_param* e) { e->generation = 0; Attr2Stat(attr, &e->attr); - // e->attr_timeout = g_vfs->GetAttrTimeout(attr.type); - // e->entry_timeout = g_vfs->GetEntryTimeout(attr.type); - e->attr_timeout = 0; - e->entry_timeout = 0; + e->attr_timeout = g_vfs->GetAttrTimeout(attr.type); + e->entry_timeout = g_vfs->GetEntryTimeout(attr.type); } Attr Stat2Attr(struct stat* stat) { @@ -580,11 +583,8 @@ void FuseOpOpenDir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info* fi) { } else { fi->fh = fh; - // fi->cache_readdir = FLAGS_client_fuse_file_info_keep_cache ? 1 : 0; - // fi->keep_cache = FLAGS_client_fuse_file_info_keep_cache ? 1 : 0; - - fi->cache_readdir = 0; - fi->keep_cache = 0; + fi->cache_readdir = FLAGS_client_fuse_enable_readdir_cache ? 1 : 0; + fi->keep_cache = FLAGS_client_fuse_enable_readdir_cache ? 1 : 0; ReplyOpen(req, fi); } @@ -597,13 +597,15 @@ void FuseOpReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, CHECK_GE(off, 0) << "offset is illegal, offset: " << off; + off_t next_off = off; size_t writed_size = 0; std::string buffer(size, '\0'); Status s = g_vfs->ReadDir( ino, fi->fh, off, false, [&](const dingofs::client::vfs::DirEntry& dir_entry, uint64_t) -> bool { - VLOG(1) << fmt::format("read dir({}/{}) fh({}) entry({}/{}).", ino, off, - fi->fh, dir_entry.name, dir_entry.ino); + VLOG(1) << fmt::format("read dir({}) off[{},{}) fh({}) entry({}/{}).", + ino, off, next_off, fi->fh, dir_entry.name, + dir_entry.ino); struct stat stat; std::memset(&stat, 0, sizeof(stat)); @@ -616,9 +618,9 @@ void FuseOpReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, dir_entry.name.c_str(), &stat, ++off); if (entsize > rest_size) { VLOG(1) << fmt::format( - "read dir entry is full, ino({}) fh({}) off({}) size({}/{}) " - "entry_size({}).", - ino, fi->fh, off, buffer.size(), size, entsize); + "read dir entry is full, ino({}) fh({}) off[{},{}) size({}) " + "entry_size({}) rest_size({}).", + ino, fi->fh, off, next_off, buffer.size(), entsize, rest_size); return false; } @@ -629,14 +631,17 @@ void FuseOpReadDir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, if (!s.ok()) { LOG(ERROR) << fmt::format( - "read dir fail, ino({}) fh({}) off({}) size({}) error({}).", ino, - fi->fh, off, size, s.ToString()); + "read dir fail, ino({}) fh({}) off[{},{}) size({}) error({}).", ino, + fi->fh, off, next_off, size, s.ToString()); ReplyError(req, s); + } else { buffer.resize(writed_size); - VLOG(1) << fmt::format("read dir success, ino({}) fh({}) off({}) size({}).", - ino, fi->fh, off, buffer.size()); + VLOG(1) << fmt::format( + "read dir success, ino({}) fh({}) off[{},{}) size({}).", ino, fi->fh, + off, next_off, writed_size); + ReplyBuf(req, buffer.data(), buffer.size()); } } @@ -648,15 +653,17 @@ void FuseOpReadDirPlus(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, CHECK_GE(off, 0) << "offset is illegal, offset: " << off; + off_t next_off = off; size_t writed_size = 0; std::string buffer(size, '\0'); Status s = g_vfs->ReadDir( ino, fi->fh, off, true, - [&](const dingofs::client::vfs::DirEntry& dir_entry, - uint64_t off2) -> bool { + [ino, off, &next_off, &req, &fi, &buffer, &writed_size]( + const dingofs::client::vfs::DirEntry& dir_entry, uint64_t) -> bool { VLOG(1) << fmt::format( - "read dir({}/{}/{}) fh({}) entry({}/{}) attr({}).", ino, off, off2, - fi->fh, dir_entry.name, dir_entry.ino, Attr2Str(dir_entry.attr)); + "read dir({}) off[{},{}) fh({}) entry({}/{}) attr({}).", ino, off, + next_off, fi->fh, dir_entry.name, dir_entry.ino, + Attr2Str(dir_entry.attr)); fuse_entry_param fuse_entry; memset(&fuse_entry, 0, sizeof(fuse_entry_param)); @@ -664,16 +671,17 @@ void FuseOpReadDirPlus(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, size_t rest_size = buffer.size() - writed_size; - size_t entsize = - fuse_add_direntry_plus(req, buffer.data() + writed_size, rest_size, - dir_entry.name.c_str(), &fuse_entry, off2); + size_t entsize = fuse_add_direntry_plus( + req, buffer.data() + writed_size, rest_size, dir_entry.name.c_str(), + &fuse_entry, next_off + 1); if (entsize > rest_size) { VLOG(1) << fmt::format( - "read dir entry is full, ino({}) fh({}) off({}) size({}/{}) " - "entry_size({})", - ino, fi->fh, off2, buffer.size(), size, entsize); + "read dir entry is full, ino({}) fh({}) off[{},{}) size({}) " + "entry_size({}) rest_size({}).", + ino, fi->fh, off, next_off, buffer.size(), entsize, rest_size); return false; } + ++next_off; writed_size += entsize; return true; @@ -681,13 +689,15 @@ void FuseOpReadDirPlus(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, if (!s.ok()) { LOG(ERROR) << fmt::format( - "read dir fail, ino({}) fh({}) off({}) size({}) error({}).", ino, - fi->fh, off, size, s.ToString()); + "read dir fail, ino({}) fh({}) off[{},{}) size({}) error({}).", ino, + fi->fh, off, next_off, size, s.ToString()); ReplyError(req, s); + } else { buffer.resize(writed_size); - VLOG(1) << fmt::format("read dir success, ino({}) fh({}) off({}) size({}).", - ino, fi->fh, off, buffer.size()); + VLOG(1) << fmt::format( + "read dir success, ino({}) fh({}) off[{},{}) size({}).", ino, fi->fh, + off, next_off, writed_size); ReplyBuf(req, buffer.data(), buffer.size()); } diff --git a/src/client/vfs/metasystem/mds/dir_iterator.cc b/src/client/vfs/metasystem/mds/dir_iterator.cc index 2a189c636..600eb36f7 100644 --- a/src/client/vfs/metasystem/mds/dir_iterator.cc +++ b/src/client/vfs/metasystem/mds/dir_iterator.cc @@ -14,69 +14,90 @@ #include "client/vfs/metasystem/mds/dir_iterator.h" -#include #include #include "client/vfs/common/helper.h" #include "common/options/client.h" #include "fmt/format.h" +#include "glog/logging.h" namespace dingofs { namespace client { namespace vfs { namespace v2 { -Status DirIterator::Seek() { - last_fetch_time_ns_ = utils::TimestampNs(); +DirIterator::~DirIterator() { + std::string str; + for (auto offset : offset_stats_) { + str += fmt::format("{},", offset); + } - std::vector entries; - auto status = - mds_client_->ReadDir(ctx_, ino_, fh_, last_name_, - FLAGS_client_vfs_read_dir_batch_size, true, entries); - if (!status.ok()) return status; + VLOG(1) << fmt::format("[dir_iterator.{}.{}] offset stats: {} {}.", ino_, fh_, + offset_stats_.size(), str); +} - offset_ = 0; - entries_ = std::move(entries); - if (!entries_.empty()) { - last_name_ = entries_.back().name; - } +void DirIterator::Remember(uint64_t off) { offset_stats_.push_back(off); } + +Status DirIterator::PreFetch() { + last_fetch_time_ns_ = utils::TimestampNs(); + // return Next(); return Status::OK(); } -bool DirIterator::Valid() { return offset_.load() < entries_.size(); } - -DirEntry DirIterator::GetValue(bool with_attr) { - CHECK(offset_.load() < entries_.size()) << "offset out of range"; +Status DirIterator::GetValue(uint64_t off, bool with_attr, + DirEntry& dir_entry) { + CHECK(off >= offset_) << fmt::format( + "[dir_iterator.{}.{}] off out of range, {} {}.", ino_, fh_, offset_, off); with_attr_ = with_attr; - return entries_[offset_.load()]; -} + do { + if (off < offset_ + entries_.size()) { + dir_entry = entries_[off - offset_]; + return Status::OK(); + } -void DirIterator::Next() { - if (++offset_ < entries_.size()) { - return; - } + if (is_fetch_ && entries_.size() < FLAGS_client_vfs_read_dir_batch_size) { + return Status::NoData("not more dentry"); + } + + auto status = Next(); + if (!status.ok()) return status; + + } while (true); + + return Status::OK(); +} +Status DirIterator::Next() { std::vector entries; auto status = mds_client_->ReadDir(ctx_, ino_, fh_, last_name_, FLAGS_client_vfs_read_dir_batch_size, with_attr_, entries); - if (!status.ok()) return; + if (!status.ok()) { + LOG(ERROR) << fmt::format( + "[dir_iterator.{}.{}] readdir fail, offset({}) last_name({}).", ino_, + fh_, offset_, last_name_); + return status; + } - offset_ = 0; + is_fetch_ = true; + + offset_ += entries_.size(); entries_ = std::move(entries); if (!entries_.empty()) { last_name_ = entries_.back().name; } + + return Status::OK(); } bool DirIterator::Dump(Json::Value& value) { value["ino"] = ino_; value["last_name"] = last_name_; value["with_attr"] = with_attr_; - value["offset"] = offset_.load(); + value["offset"] = offset_; Json::Value entries = Json::arrayValue; for (const auto& entry : entries_) { diff --git a/src/client/vfs/metasystem/mds/dir_iterator.h b/src/client/vfs/metasystem/mds/dir_iterator.h index 6fd0996ec..a1cb4b516 100644 --- a/src/client/vfs/metasystem/mds/dir_iterator.h +++ b/src/client/vfs/metasystem/mds/dir_iterator.h @@ -19,6 +19,7 @@ #include #include +#include #include "client/vfs/metasystem/mds/mds_client.h" #include "client/vfs/vfs_meta.h" @@ -38,16 +39,17 @@ class DirIterator { public: DirIterator(ContextSPtr ctx, MDSClientSPtr mds_client, Ino ino, uint64_t fh) : ctx_(ctx), mds_client_(mds_client), ino_(ino), fh_(fh) {} + ~DirIterator(); static DirIteratorSPtr New(ContextSPtr ctx, MDSClientSPtr mds_client, Ino ino, uint64_t fh) { return std::make_shared(ctx, mds_client, ino, fh); } - Status Seek(); - bool Valid(); - DirEntry GetValue(bool with_attr); - void Next(); + void Remember(uint64_t off); + + Status PreFetch(); + Status GetValue(uint64_t off, bool with_attr, DirEntry& dir_entry); uint64_t LastFetchTimeNs() const { return last_fetch_time_ns_.load(); } @@ -55,20 +57,26 @@ class DirIterator { bool Load(const Json::Value& value); private: + Status Next(); + ContextSPtr ctx_; Ino ino_; uint64_t fh_; // last file/dir name, used to read next batch std::string last_name_; - bool with_attr_{false}; + bool with_attr_{true}; - std::atomic offset_{0}; + uint64_t offset_{0}; // stash entry for read dir std::vector entries_; MDSClientSPtr mds_client_; std::atomic last_fetch_time_ns_{0}; + bool is_fetch_{false}; + + // stat + std::vector offset_stats_; }; class DirIteratorManager { diff --git a/src/client/vfs/metasystem/mds/filesystem.cc b/src/client/vfs/metasystem/mds/filesystem.cc index 78a07405d..906409926 100644 --- a/src/client/vfs/metasystem/mds/filesystem.cc +++ b/src/client/vfs/metasystem/mds/filesystem.cc @@ -449,8 +449,9 @@ Status MDSFileSystem::ReadSlice(ContextSPtr ctx, Ino ino, uint64_t index, if (fh != 0 && GetSliceFromCache(ino, index, slices)) { ctx->hit_cache = true; LOG(INFO) << fmt::format( - "[meta.filesystem.{}.{}.{}] readslice from cache, slices{}.", ino, fh, - index, Helper::GetSliceIds(*slices)); + "[meta.filesystem.{}.{}.{}] readslice from cache, version({}) " + "slices{}.", + ino, fh, index, version, Helper::GetSliceIds(*slices)); return Status::OK(); } @@ -595,7 +596,7 @@ Status MDSFileSystem::RmDir(ContextSPtr ctx, Ino parent, Status MDSFileSystem::OpenDir(ContextSPtr ctx, Ino ino, uint64_t fh) { auto dir_iterator = DirIterator::New(ctx, mds_client_, ino, fh); - auto status = dir_iterator->Seek(); + auto status = dir_iterator->PreFetch(); if (!status.ok()) { LOG(ERROR) << fmt::format( "[meta.filesystem.{}.{}] opendir fail, error({}).", ino, fh, @@ -614,16 +615,21 @@ Status MDSFileSystem::ReadDir(ContextSPtr ctx, Ino, uint64_t fh, auto dir_iterator = dir_iterator_manager_.Get(fh); CHECK(dir_iterator != nullptr) << "dir_iterator is null"; - while (dir_iterator->Valid()) { - DirEntry entry = dir_iterator->GetValue(with_attr); + dir_iterator->Remember(offset); + + while (true) { + DirEntry entry; + auto status = dir_iterator->GetValue(offset++, with_attr, entry); + if (!status.ok()) { + if (status.IsNoData()) break; + return status; + } CorrectAttr(ctx, dir_iterator->LastFetchTimeNs(), entry.attr, "readdir"); - if (!handler(entry, ++offset)) { + if (!handler(entry, offset)) { break; } - - dir_iterator->Next(); } return Status::OK(); diff --git a/src/client/vfs/vfs_impl.cc b/src/client/vfs/vfs_impl.cc index 8c58d147d..1b5fe48d2 100644 --- a/src/client/vfs/vfs_impl.cc +++ b/src/client/vfs/vfs_impl.cc @@ -90,9 +90,13 @@ bool VFSImpl::Load(ContextSPtr ctx, const Json::Value& value) { return meta_system_->Load(ctx, value); } -double VFSImpl::GetAttrTimeout(const FileType& type) { return 1; } // NOLINT +double VFSImpl::GetAttrTimeout(const FileType& type) { // NOLINT + return FLAGS_client_fuse_attr_cache_timeout_s; +} -double VFSImpl::GetEntryTimeout(const FileType& type) { return 1; } // NOLINT +double VFSImpl::GetEntryTimeout(const FileType& type) { // NOLINT + return FLAGS_client_fuse_entry_cache_timeout_s; +} Status VFSImpl::Lookup(ContextSPtr ctx, Ino parent, const std::string& name, Attr* attr) { @@ -482,11 +486,7 @@ Status VFSImpl::ReadDir(ContextSPtr ctx, Ino ino, uint64_t fh, uint64_t offset, handler(stats_entry, 1); // pos 0 is the offset for .stats entry } - return meta_system_->ReadDir( - ctx, ino, fh, offset, with_attr, - [handler](const DirEntry& entry, uint64_t offset) { - return handler(entry, offset); - }); + return meta_system_->ReadDir(ctx, ino, fh, offset, with_attr, handler); } Status VFSImpl::ReleaseDir(ContextSPtr ctx, Ino ino, uint64_t fh) { diff --git a/src/client/vfs/vfs_impl.h b/src/client/vfs/vfs_impl.h index 3012a59b9..b2595f8ab 100644 --- a/src/client/vfs/vfs_impl.h +++ b/src/client/vfs/vfs_impl.h @@ -17,6 +17,7 @@ #ifndef DINGOFS_CLIENT_VFS_IMPL_H_ #define DINGOFS_CLIENT_VFS_IMPL_H_ +#include #include #include "brpc/server.h" diff --git a/src/client/vfs/vfs_wrapper.cc b/src/client/vfs/vfs_wrapper.cc index 6017641aa..eff5e9da4 100644 --- a/src/client/vfs/vfs_wrapper.cc +++ b/src/client/vfs/vfs_wrapper.cc @@ -811,24 +811,7 @@ Status VFSWrapper::ReadDir(Ino ino, uint64_t fh, uint64_t offset, ClientOpMetricGuard op_metric( {&client_op_metric_->opReadDir, &client_op_metric_->opAll}); - if (FLAGS_client_access_logging_verbose) { - s = vfs_->ReadDir( - span->GetContext(), ino, fh, offset, with_attr, - [&](const vfs::DirEntry& dir_entry, uint64_t off) -> bool { - dingofs::client::AccessLogGuard log( - [ino, off, name = std::string(dir_entry.name), - attr = dir_entry.attr, fh]() { - return fmt::format("add_direntry ({}/{}) : {} {} [fh:{}]", ino, - name, dingofs::client::vfs::Attr2Str(attr), - off, fh); - }); - - return handler(dir_entry, off); - }); - - } else { - s = vfs_->ReadDir(span->GetContext(), ino, fh, offset, with_attr, handler); - } + s = vfs_->ReadDir(span->GetContext(), ino, fh, offset, with_attr, handler); VLOG(1) << "VFSReaddir end, status: " << s.ToString(); diff --git a/src/common/options/client.cc b/src/common/options/client.cc index 2e6be456e..dd2f526ee 100644 --- a/src/common/options/client.cc +++ b/src/common/options/client.cc @@ -44,10 +44,6 @@ DEFINE_int32(client_bthread_worker_num, 0, "bthread worker num"); DEFINE_bool(client_access_logging, true, "enable access log"); DEFINE_validator(client_access_logging, brpc::PassValidate); -DEFINE_bool(client_access_logging_verbose, true, - "enable access log with verbose"); -DEFINE_validator(client_access_logging_verbose, brpc::PassValidate); - DEFINE_int64(client_access_log_threshold_us, 0, "access log threshold"); DEFINE_validator(client_access_log_threshold_us, brpc::PassValidate); @@ -58,6 +54,17 @@ DEFINE_validator(client_fuse_file_info_direct_io, brpc::PassValidate); DEFINE_bool(client_fuse_file_info_keep_cache, false, "keep file page cache"); DEFINE_validator(client_fuse_file_info_keep_cache, brpc::PassValidate); +DEFINE_bool(client_fuse_enable_readdir_cache, false, "enable readdir cache"); +DEFINE_validator(client_fuse_enable_readdir_cache, brpc::PassValidate); + +DEFINE_uint32(client_fuse_entry_cache_timeout_s, 1, + "fuse entry cache timeout in seconds"); +DEFINE_validator(client_fuse_entry_cache_timeout_s, brpc::PassValidate); + +DEFINE_uint32(client_fuse_attr_cache_timeout_s, 1, + "fuse attr cache timeout in seconds"); +DEFINE_validator(client_fuse_attr_cache_timeout_s, brpc::PassValidate); + // smooth upgrade DEFINE_uint32(client_fuse_fd_get_max_retries, 100, "the max retries that get fuse fd from old dingo-fuse during " diff --git a/src/common/options/client.h b/src/common/options/client.h index 81cf93fa8..e2d52d7cc 100644 --- a/src/common/options/client.h +++ b/src/common/options/client.h @@ -62,12 +62,15 @@ DECLARE_int32(client_bthread_worker_num); // access log DECLARE_bool(client_access_logging); -DECLARE_bool(client_access_logging_verbose); DECLARE_int64(client_access_log_threshold_us); // fuse module DECLARE_bool(client_fuse_file_info_direct_io); DECLARE_bool(client_fuse_file_info_keep_cache); +DECLARE_bool(client_fuse_enable_readdir_cache); + +DECLARE_uint32(client_fuse_entry_cache_timeout_s); +DECLARE_uint32(client_fuse_attr_cache_timeout_s); // smooth upgrade DECLARE_uint32(client_fuse_fd_get_max_retries); diff --git a/src/mds/filesystem/filesystem.cc b/src/mds/filesystem/filesystem.cc index c948b4c31..872ddee0c 100644 --- a/src/mds/filesystem/filesystem.cc +++ b/src/mds/filesystem/filesystem.cc @@ -213,7 +213,13 @@ void FileSystem::DeleteDentryFromPartition(Ino parent, const std::string& name, } Status FileSystem::GetPartition(Context& ctx, Ino parent, PartitionPtr& out_partition) { - return GetPartition(ctx, ctx.GetInodeVersion(), parent, out_partition); + auto status = GetPartition(ctx, ctx.GetInodeVersion(), parent, out_partition); + if (status.ok()) { + DINGO_LOG(DEBUG) << fmt::format("[fs.{}.{}.{}] get partition({}/{}) this({}).", fs_id_, out_partition->INo(), + ctx.RequestId(), out_partition->BaseVersion(), out_partition->DeltaVersion(), + (void*)out_partition.get()); + } + return status; } Status FileSystem::GetPartition(Context& ctx, uint64_t version, Ino parent, PartitionPtr& out_partition) { @@ -224,7 +230,7 @@ Status FileSystem::GetPartition(Context& ctx, uint64_t version, Ino parent, Part if (bypass_cache) { auto status = GetPartitionFromStore(ctx, parent, "Bypass", out_partition); if (!status.ok()) { - return Status(pb::error::ENOT_FOUND, fmt::format("not found partition({}), {}.", parent, status.error_str())); + return Status(status.error_code(), fmt::format("not found partition({}), {}.", parent, status.error_str())); } return status; @@ -234,7 +240,7 @@ Status FileSystem::GetPartition(Context& ctx, uint64_t version, Ino parent, Part if (partition == nullptr) { auto status = GetPartitionFromStore(ctx, parent, "CacheMiss", out_partition); if (!status.ok()) { - return Status(pb::error::ENOT_FOUND, fmt::format("not found partition({}), {}.", parent, status.error_str())); + return Status(status.error_code(), fmt::format("not found partition({}), {}.", parent, status.error_str())); } return status; @@ -245,7 +251,7 @@ Status FileSystem::GetPartition(Context& ctx, uint64_t version, Ino parent, Part std::string reason = fmt::format("OutOfDate[{},cache{},req{}]", use_base_version, check_version, version); auto status = GetPartitionFromStore(ctx, parent, reason, out_partition); if (!status.ok()) { - return Status(pb::error::ENOT_FOUND, fmt::format("not found partition({}), {}.", parent, status.error_str())); + return Status(status.error_code(), fmt::format("not found partition({}), {}.", parent, status.error_str())); } return status; @@ -303,7 +309,7 @@ Status FileSystem::GetPartitionFromStore(Context& ctx, Ino parent, const std::st for (size_t i = 1; i < kvs.size(); ++i) { const auto& kv = kvs.at(i); auto dentry = MetaCodec::DecodeDentryValue(kv.value); - partition->PutChild(dentry); + partition->PutChild(dentry, parent_inode->Version()); } out_partition = partition_cache_.PutIf(parent, partition); @@ -386,7 +392,7 @@ Status FileSystem::GetInode(Context& ctx, uint64_t version, const Dentry& dentry } while (false); if (is_fetch && status.ok()) { - partition->PutChild(Dentry(dentry, out_inode)); + partition->PutChildForInode(Dentry(dentry, out_inode)); } return status; @@ -1803,7 +1809,7 @@ Status FileSystem::Rename(Context& ctx, const RenameParam& param, uint64_t& old_ auto status = GetPartition(ctx, old_parent, old_parent_partition); if (status.ok()) { // delete old dentry - old_parent_partition->DeleteChild(old_name); + old_parent_partition->DeleteChild(old_name, old_parent_attr.version()); // update old parent attr UpsertInodeCache(old_parent_attr); } @@ -1818,12 +1824,12 @@ Status FileSystem::Rename(Context& ctx, const RenameParam& param, uint64_t& old_ UpsertInodeCache(new_parent_attr); // delete prev new dentry - if (is_exist_new_dentry) new_parent_partition->DeleteChild(new_name); + if (is_exist_new_dentry) new_parent_partition->DeleteChild(new_name, new_parent_attr.version()); // add new dentry Dentry new_dentry(fs_id_, new_name, new_parent, old_dentry.ino(), old_dentry.type(), 0, GetInodeFromCache(old_dentry.ino())); - new_parent_partition->PutChild(new_dentry); + new_parent_partition->PutChild(new_dentry, new_parent_attr.version()); } // delete exist new partition diff --git a/src/mds/filesystem/partition.cc b/src/mds/filesystem/partition.cc index 0a9ec574c..01d11706d 100644 --- a/src/mds/filesystem/partition.cc +++ b/src/mds/filesystem/partition.cc @@ -14,13 +14,13 @@ #include "mds/filesystem/partition.h" -#include - #include #include #include #include "fmt/format.h" +#include "glog/logging.h" +#include "mds/common/helper.h" #include "mds/common/logging.h" namespace dingofs { @@ -30,6 +30,7 @@ static const std::string kPartitionMetricsPrefix = "dingofs_{}_partition_cache_" // 0: no limit DEFINE_uint32(mds_partition_cache_max_count, 4 * 1024 * 1024, "partition cache max count"); +DEFINE_uint32(mds_partition_dentry_op_expire_interval_s, 600, "partition dentry op expire interval seconds"); const uint32_t kDentryDefaultNum = 1024; @@ -58,6 +59,9 @@ void Partition::SetParentInode(InodeSPtr parent_inode) { } void Partition::PutChild(const Dentry& dentry, uint64_t version) { + DINGO_LOG(DEBUG) << fmt::format("[partition.{}] put child, name({}), version({}) this({}).", ino_, dentry.Name(), + version, (void*)this); + utils::WriteLockGuard lk(lock_); auto it = children_.find(dentry.Name()); @@ -65,13 +69,22 @@ void Partition::PutChild(const Dentry& dentry, uint64_t version) { it->second = dentry; } else { children_[dentry.Name()] = dentry; - } - delta_dentry_ops_.push_back(DentryOp{DentryOpType::ADD, version, dentry}); + AddDeltaDentryOp(DentryOp{DentryOpType::ADD, version, dentry}); + } delta_version_ = std::max(version, delta_version_); } +void Partition::PutChildForInode(const Dentry& dentry) { + utils::WriteLockGuard lk(lock_); + + auto it = children_.find(dentry.Name()); + if (it != children_.end()) { + it->second = dentry; + } +} + void Partition::DeleteChild(const std::string& name, uint64_t version) { utils::WriteLockGuard lk(lock_); @@ -82,7 +95,7 @@ void Partition::DeleteChild(const std::string& name, uint64_t version) { children_.erase(it); } - delta_dentry_ops_.push_back(DentryOp{DentryOpType::DELETE, version, detry}); + AddDeltaDentryOp(DentryOp{DentryOpType::DELETE, version, detry}); delta_version_ = std::max(version, delta_version_); } @@ -92,7 +105,7 @@ void Partition::DeleteChildIf(const std::string& name, Ino ino, uint64_t version auto it = children_.find(name); if (it != children_.end() && it->second.INo() == ino) { - delta_dentry_ops_.push_back(DentryOp{DentryOpType::DELETE, version, it->second}); + AddDeltaDentryOp(DentryOp{DentryOpType::DELETE, version, it->second}); children_.erase(it); } @@ -156,9 +169,9 @@ bool Partition::Merge(PartitionPtr& other_partition) { if (other_partition->BaseVersion() <= base_version_) return false; - DINGO_LOG(INFO) << fmt::format("[partition.{}] merge, self({},{},{},{}) other({},{}).", ino_, base_version_, - delta_version_, children_.size(), delta_dentry_ops_.size(), - other_partition->BaseVersion(), other_partition->children_.size()); + DINGO_LOG(DEBUG) << fmt::format("[partition.{}] merge, self({},{},{},{}) other({},{}).", ino_, base_version_, + delta_version_, children_.size(), delta_dentry_ops_.size(), + other_partition->BaseVersion(), other_partition->children_.size()); base_version_ = other_partition->BaseVersion(); children_.swap(other_partition->children_); @@ -185,6 +198,21 @@ bool Partition::Merge(PartitionPtr& other_partition) { return true; } +void Partition::AddDeltaDentryOp(DentryOp&& op) { + op.time_s = Helper::Timestamp(); + delta_dentry_ops_.push_back(std::move(op)); + + // clean expired ops + uint64_t now_s = Helper::Timestamp(); + for (auto it = delta_dentry_ops_.begin(); it != delta_dentry_ops_.end();) { + if (it->time_s + FLAGS_mds_partition_dentry_op_expire_interval_s < now_s) { + it = delta_dentry_ops_.erase(it); + } else { + ++it; + } + } +} + PartitionCache::PartitionCache(uint32_t fs_id) : fs_id_(fs_id), cache_(FLAGS_mds_partition_cache_max_count, @@ -198,6 +226,8 @@ PartitionPtr PartitionCache::PutIf(Ino ino, PartitionPtr partition) { new_partition = old_partition; }); + DINGO_LOG(DEBUG) << fmt::format("[cache.partition.{}.{}] putif, this({}).", fs_id_, ino, (void*)new_partition.get()); + return new_partition; } diff --git a/src/mds/filesystem/partition.h b/src/mds/filesystem/partition.h index da2399769..b750d9379 100644 --- a/src/mds/filesystem/partition.h +++ b/src/mds/filesystem/partition.h @@ -49,9 +49,10 @@ class Partition { InodeSPtr ParentInode(); void SetParentInode(InodeSPtr parent_inode); - void PutChild(const Dentry& dentry, uint64_t version = 0); - void DeleteChild(const std::string& name, uint64_t version = 0); - void DeleteChildIf(const std::string& name, Ino ino, uint64_t version = 0); + void PutChild(const Dentry& dentry, uint64_t version); + void PutChildForInode(const Dentry& dentry); + void DeleteChild(const std::string& name, uint64_t version); + void DeleteChildIf(const std::string& name, Ino ino, uint64_t version); bool HasChild(); bool GetChild(const std::string& name, Dentry& dentry); @@ -80,7 +81,10 @@ class Partition { DentryOpType op_type; uint64_t version; Dentry dentry; + uint64_t time_s; }; + void AddDeltaDentryOp(DentryOp&& op); + std::list delta_dentry_ops_; }; diff --git a/test/unit/mds/filesystem/test_dentry.cc b/test/unit/mds/filesystem/test_dentry.cc index 2a243a16b..3249f7ca1 100644 --- a/test/unit/mds/filesystem/test_dentry.cc +++ b/test/unit/mds/filesystem/test_dentry.cc @@ -12,11 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + +#include +#include +#include +#include +#include #include +#include +#include +#include #include "dingofs/mds.pb.h" -#include "fmt/core.h" +#include "fmt/format.h" #include "gtest/gtest.h" +#include "mds/common/helper.h" #include "mds/filesystem/dentry.h" namespace dingofs { @@ -42,6 +53,131 @@ TEST_F(DentryTest, Put) { ASSERT_EQ(nullptr, dentry.Inode()); } +class FileStore { + public: + FileStore() { files_.reserve(1000 * 1000); } + ~FileStore() = default; + + void AddFile(const std::string& file_name) { + std::lock_guard lg(mutex_); + + ++version_; + files_.push_back(file_name); + } + + void GetFiles(uint64_t& version, std::vector& result) { + std::lock_guard lg(mutex_); + + version = version_; + + for (auto& file : files_) { + result.push_back(file); + } + } + + private: + std::mutex mutex_; + uint64_t version_ = 0; + std::vector files_; +}; + +using FileStoreSPtr = std::shared_ptr; + +TEST_F(DentryTest, ReadDir) { + FileStoreSPtr file_store = std::make_shared(); + + const std::string sandbox = + "/home/dengzihui/mount-test/dengzh_hash_01-1/test4"; + + // create thread readdir + int readdir_thread_num = 10; + std::vector readdir_threads; + for (int i = 0; i < readdir_thread_num; ++i) { + std::thread t([sandbox, thread_no = i, file_store]() { + while (true) { + uint64_t version = 0; + std::vector filenames; + file_store->GetFiles(version, filenames); + + std::set read_filenames; + std::vector read_filename_vec; + uint64_t dentry_count = 0; + for (auto const& dir_entry : + std::filesystem::directory_iterator{sandbox}) { + // LOG(INFO) << fmt::format("[readdir.{}] path: {}.", thread_no, + // dir_entry.path().string()); + read_filename_vec.push_back(dir_entry.path().string()); + auto result = read_filenames.insert(dir_entry.path().string()); + if (!result.second) { + LOG(ERROR) << fmt::format("[readdir.{}] duplicate filename: {}.", + thread_no, dir_entry.path().string()); + } + ++dentry_count; + } + + LOG(INFO) << fmt::format( + "[readdir.{}] count: {} filenames_count({}) read_filename_vec({}).", + thread_no, dentry_count, filenames.size(), + read_filename_vec.size()); + + // check filen exist + for (auto& filename : filenames) { + if (read_filenames.find(std::string(filename)) == + read_filenames.end()) { + for (const auto& filename : read_filenames) { + LOG(INFO) << fmt::format("[readdir.{}] read_filename: {}.", + thread_no, filename); + } + + for (const auto& filename : read_filename_vec) { + LOG(INFO) << fmt::format("[readdir.{}] read_filename_vec: {}.", + thread_no, filename); + } + + LOG(FATAL) << fmt::format("[readdir.{}] filename not exist: {}.", + thread_no, filename); + } + } + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + }); + + readdir_threads.push_back(std::move(t)); + } + + // create thread for create file + int createfile_thread_num = 3; + std::vector createfile_threads; + for (int i = 0; i < createfile_thread_num; ++i) { + std::thread t([sandbox, thread_no = i, file_store]() { + for (size_t j = 0; j < 10000; ++j) { + const std::string file_path = fmt::format( + "{}/file_{}_{}", sandbox, Helper::GenerateRandomString(32), j); + + std::ofstream ofs(file_path); + // ofs << "this is a test file." << '\n'; + ofs.close(); + + file_store->AddFile(file_path); + + // sleep for a while + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + }); + + createfile_threads.push_back(std::move(t)); + } + + for (auto& t : createfile_threads) { + t.join(); + } + + for (auto& t : readdir_threads) { + t.join(); + } +} + } // namespace unit_test } // namespace mds } // namespace dingofs diff --git a/test/unit/mds/filesystem/test_partition.cc b/test/unit/mds/filesystem/test_partition.cc index 69eb544d3..abc49a739 100644 --- a/test/unit/mds/filesystem/test_partition.cc +++ b/test/unit/mds/filesystem/test_partition.cc @@ -66,17 +66,23 @@ TEST_F(PartitionCacheTest, Put) { uint64_t parent_ino = 1; partition->PutChild(Dentry(kFsId, "dir01", parent_ino, 100000, - pb::mds::FileType::DIRECTORY, 0)); + pb::mds::FileType::DIRECTORY, 0), + 1); partition->PutChild(Dentry(kFsId, "dir02", parent_ino, 100001, - pb::mds::FileType::DIRECTORY, 0)); + pb::mds::FileType::DIRECTORY, 0), + 2); partition->PutChild(Dentry(kFsId, "dir03", parent_ino, 100002, - pb::mds::FileType::DIRECTORY, 0)); + pb::mds::FileType::DIRECTORY, 0), + 3); partition->PutChild(Dentry(kFsId, "dir04", parent_ino, 100003, - pb::mds::FileType::DIRECTORY, 0)); + pb::mds::FileType::DIRECTORY, 0), + 4); partition->PutChild( - Dentry(kFsId, "file01", parent_ino, 100004, pb::mds::FileType::FILE, 0)); + Dentry(kFsId, "file01", parent_ino, 100004, pb::mds::FileType::FILE, 0), + 5); partition->PutChild( - Dentry(kFsId, "file01", parent_ino, 100005, pb::mds::FileType::FILE, 0)); + Dentry(kFsId, "file01", parent_ino, 100005, pb::mds::FileType::FILE, 0), + 6); partition_cache.PutIf(inode->Ino(), partition); @@ -92,17 +98,23 @@ TEST_F(PartitionCacheTest, Delete) { uint64_t parent_ino = 1; partition->PutChild(Dentry(kFsId, "dir01", parent_ino, 100000, - pb::mds::FileType::DIRECTORY, 0)); + pb::mds::FileType::DIRECTORY, 0), + 1); partition->PutChild(Dentry(kFsId, "dir02", parent_ino, 100001, - pb::mds::FileType::DIRECTORY, 0)); + pb::mds::FileType::DIRECTORY, 0), + 2); partition->PutChild(Dentry(kFsId, "dir03", parent_ino, 100002, - pb::mds::FileType::DIRECTORY, 0)); + pb::mds::FileType::DIRECTORY, 0), + 3); partition->PutChild(Dentry(kFsId, "dir04", parent_ino, 100003, - pb::mds::FileType::DIRECTORY, 0)); + pb::mds::FileType::DIRECTORY, 0), + 4); partition->PutChild( - Dentry(kFsId, "file01", parent_ino, 100004, pb::mds::FileType::FILE, 0)); + Dentry(kFsId, "file01", parent_ino, 100004, pb::mds::FileType::FILE, 0), + 5); partition->PutChild( - Dentry(kFsId, "file01", parent_ino, 100005, pb::mds::FileType::FILE, 0)); + Dentry(kFsId, "file01", parent_ino, 100005, pb::mds::FileType::FILE, 0), + 6); partition_cache.PutIf(inode->Ino(), partition); @@ -112,6 +124,34 @@ TEST_F(PartitionCacheTest, Delete) { ASSERT_TRUE(partition_cache.Get(inode->Ino()) == nullptr); } +TEST_F(PartitionCacheTest, PutIf) { + PartitionCache partition_cache(kFsId); + + InodeSPtr inode = + Inode::New(GenInode(kFsId, 1, pb::mds::FileType::DIRECTORY)); + auto partition = Partition::New(inode); + + { + auto resp_partition = partition_cache.PutIf(inode->Ino(), partition); + ASSERT_TRUE(resp_partition.get() == partition.get()); + } + + { + InodeSPtr inode = + Inode::New(GenInode(kFsId, 1, pb::mds::FileType::DIRECTORY)); + auto temp_partition = Partition::New(inode); + + Dentry dentry(kFsId, "file1", 1, 1000, pb::mds::FileType::FILE, 1212, + nullptr); + temp_partition->PutChild(dentry, 2); + + auto resp_partition = partition_cache.PutIf(inode->Ino(), temp_partition); + ASSERT_TRUE(resp_partition.get() == partition.get()); + Dentry temp_dentry; + ASSERT_TRUE(resp_partition->GetChild("file1", temp_dentry)); + } +} + } // namespace unit_test } // namespace mds } // namespace dingofs