From 40dbc29984d67a3f4946a0b30d53f3db19952bf0 Mon Sep 17 00:00:00 2001 From: wangyingbin Date: Wed, 16 Sep 2020 15:15:45 +0800 Subject: [PATCH 0001/2866] rgw:When KMS encryption is used and the key does not exist, we should not throw ERR_ INVALID_ ACCESS_ Key error code. When kms encryption is used, the key_id is null or the actual_key size is wrong, we should not throw "ERR_INVALID_ACCESS_KEY " error code, instead of "EINVAL"error code, is used to indicate parameter error. Signed-off-by: wangyingbin --- src/rgw/rgw_crypt.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rgw/rgw_crypt.cc b/src/rgw/rgw_crypt.cc index 176a830430e09..3a040da942cca 100644 --- a/src/rgw/rgw_crypt.cc +++ b/src/rgw/rgw_crypt.cc @@ -782,7 +782,7 @@ int rgw_s3_prepare_encrypt(struct req_state* s, ldout(s->cct, 5) << "ERROR: not provide a valid key id" << dendl; s->err.message = "Server Side Encryption with KMS managed key requires " "HTTP header x-amz-server-side-encryption-aws-kms-key-id"; - return -ERR_INVALID_ACCESS_KEY; + return -EINVAL; } /* try to retrieve actual key */ std::string key_selector = create_random_key_selector(s->cct); @@ -797,7 +797,7 @@ int rgw_s3_prepare_encrypt(struct req_state* s, ldout(s->cct, 5) << "ERROR: key obtained from key_id:" << key_id << " is not 256 bit size" << dendl; s->err.message = "KMS provided an invalid key for the given kms-keyid."; - return -ERR_INVALID_ACCESS_KEY; + return -EINVAL; } set_attr(attrs, RGW_ATTR_CRYPT_MODE, "SSE-KMS"); set_attr(attrs, RGW_ATTR_CRYPT_KEYID, key_id); From b8d7d356120dc3698def0b88deb5616448cfd3c0 Mon Sep 17 00:00:00 2001 From: Seena Fallah Date: Wed, 14 Oct 2020 10:43:15 +0330 Subject: [PATCH 0002/2866] rgw: add infile arg to role-policy put Allow passing policy-doc with infile arg Signed-off-by: Seena Fallah --- doc/radosgw/role.rst | 4 ++++ src/rgw/rgw_admin.cc | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/radosgw/role.rst b/doc/radosgw/role.rst index 4511e2fe13311..66ef0ac9c1f84 100644 --- a/doc/radosgw/role.rst +++ b/doc/radosgw/role.rst @@ -203,6 +203,10 @@ For example:: radosgw-admin role-policy put --role-name=S3Access1 --policy-name=Policy1 --policy-doc=\{\"Version\":\"2012-10-17\",\"Statement\":\[\{\"Effect\":\"Allow\",\"Action\":\[\"s3:*\"\],\"Resource\":\"arn:aws:s3:::example_bucket\"\}\]\} +For passing ``policy-doc`` as a file:: + + radosgw-admin role-policy put --role-name=S3Access1 --policy-name=Policy1 --infile policy-document.json + In the above example, we are attaching a policy 'Policy1' to role 'S3Access1', which allows all s3 actions on 'example_bucket'. List Permission Policy Names attached to a Role diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 9f4ef12dbff0a..376f4b5489130 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -5834,12 +5834,22 @@ int main(int argc, const char **argv) return -EINVAL; } - if (perm_policy_doc.empty()) { + if (perm_policy_doc.empty() && infile.empty()) { cerr << "permission policy document is empty" << std::endl; return -EINVAL; } - bufferlist bl = bufferlist::static_from_string(perm_policy_doc); + bufferlist bl; + if (!infile.empty()) { + int ret = read_input(infile, bl); + if (ret < 0) { + cerr << "ERROR: failed to read input policy document: " << cpp_strerror(-ret) << std::endl; + return -ret; + } + perm_policy_doc = bl.to_str(); + } else { + bl = bufferlist::static_from_string(perm_policy_doc); + } try { const rgw::IAM::Policy p(g_ceph_context, tenant, bl); } catch (rgw::IAM::PolicyParseException& e) { From 3ae77c51a74f441d1e9a078fc7bf61103f783b5e Mon Sep 17 00:00:00 2001 From: Liu Lan Date: Thu, 10 Dec 2020 17:00:56 +0800 Subject: [PATCH 0003/2866] rgw: an empty tagset is allowed by S3 Signed-off-by: Liu Lan --- src/rgw/rgw_tag.cc | 3 +++ src/rgw/rgw_tag_s3.cc | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rgw/rgw_tag.cc b/src/rgw/rgw_tag.cc index f43a182486856..6f030c3577ccb 100644 --- a/src/rgw/rgw_tag.cc +++ b/src/rgw/rgw_tag.cc @@ -35,6 +35,9 @@ int RGWObjTags::check_and_add_tag(const string&key, const string& val){ } int RGWObjTags::set_from_string(const string& input){ + if (input.empty()) { + return 0; + } int ret=0; vector kvs; boost::split(kvs, input, boost::is_any_of("&")); diff --git a/src/rgw/rgw_tag_s3.cc b/src/rgw/rgw_tag_s3.cc index b03607f73a7e6..b54265f90bff5 100644 --- a/src/rgw/rgw_tag_s3.cc +++ b/src/rgw/rgw_tag_s3.cc @@ -30,7 +30,8 @@ void RGWObjTagEntry_S3::dump_xml(Formatter *f) const { void RGWObjTagSet_S3::decode_xml(XMLObj *obj) { vector entries; - RGWXMLDecoder::decode_xml("Tag", entries, obj, true); + bool mandatory{false}; + RGWXMLDecoder::decode_xml("Tag", entries, obj, mandatory); for (auto& entry : entries) { const std::string& key = entry.get_key(); From 7c7b6f4ed6bcb9be7456537204b7b374c7df7115 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Thu, 25 Feb 2021 13:16:01 +0100 Subject: [PATCH 0004/2866] os/bluestore: Set new compression blob size to 64K Signed-off-by: Adam Kupczyk --- src/common/options/global.yaml.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index deb15388ccbff..172caf2f6b288 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -5446,7 +5446,7 @@ options: state) media fmt_desc: Default value of ``bluestore compression min blob size`` for non-rotational (solid state) media. - default: 8_K + default: 64_K see_also: - bluestore_compression_min_blob_size flags: From 6380481f0538f0b0655f0a1c1c05db623447ec03 Mon Sep 17 00:00:00 2001 From: Paul Reece Date: Fri, 4 Jun 2021 14:38:39 -0400 Subject: [PATCH 0005/2866] rgw: allow CompletedMultipartUpload in XML Bit of a strange one here It seems some S3 libraries like nodejs and golang have been using CompletedMultipartUpload instead of CompleteMultipartUpload in their XML, which some things speaking the S3 protocol allow, while others do not See also: https://github.com/aws/aws-sdk-js-v3/issues/1814 I have no idea if the Ceph Project wants to allow this undocumented behavior or not, but if you do, this code works for me :) Signed-off-by: Paul Reece --- src/rgw/rgw_multi.cc | 1 + src/rgw/rgw_op.cc | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/rgw/rgw_multi.cc b/src/rgw/rgw_multi.cc index e3369f64b641f..82c5d0ed2cd97 100644 --- a/src/rgw/rgw_multi.cc +++ b/src/rgw/rgw_multi.cc @@ -57,6 +57,7 @@ bool RGWMultiCompleteUpload::xml_end(const char *el) { XMLObj *RGWMultiXMLParser::alloc_obj(const char *el) { XMLObj *obj = NULL; if (strcmp(el, "CompleteMultipartUpload") == 0 || + strcmp(el, "CompletedMultipartUpload") == 0 || strcmp(el, "MultipartUpload") == 0) { obj = new RGWMultiCompleteUpload(); } else if (strcmp(el, "Part") == 0) { diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index ad1b0a82c9b29..5e6f13aed0996 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -5887,11 +5887,16 @@ void RGWCompleteMultipart::execute(optional_yield y) } parts = static_cast(parser.find_first("CompleteMultipartUpload")); + if (!parts || parts->parts.empty()) { + parts = static_cast(parser.find_first("CompletedMultipartUpload")); + } + if (!parts || parts->parts.empty()) { op_ret = -ERR_MALFORMED_XML; return; } + if ((int)parts->parts.size() > s->cct->_conf->rgw_multipart_part_upload_limit) { op_ret = -ERANGE; From be0842f2dd5731f0f6eaab55471662dfd2865ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20H=C3=BCttner?= Date: Thu, 6 May 2021 12:18:00 -0400 Subject: [PATCH 0006/2866] rgw: RGWSwiftWebsiteHandler::is_web_dir checks empty subdir_name checking for empty name avoids later assertion in RGWObjectCtx::set_atomic Fixes: CVE-2021-3531 (cherry picked from commit f44a8ae8aa27ecef69528db9aec220f12492810e) Signed-off-by: Felix Huettner --- src/rgw/rgw_rest_swift.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc index e2a2db57d4ae5..1623dd07f3cf6 100644 --- a/src/rgw/rgw_rest_swift.cc +++ b/src/rgw/rgw_rest_swift.cc @@ -2551,6 +2551,9 @@ bool RGWSwiftWebsiteHandler::is_web_dir() const return false; } else if (subdir_name.back() == '/') { subdir_name.pop_back(); + if (subdir_name.empty()) { + return false; + } } std::unique_ptr obj = s->bucket->get_object(rgw_obj_key(std::move(subdir_name))); From 399a45e113322f3b2f8be9af83044c3aee8d96b9 Mon Sep 17 00:00:00 2001 From: Sunny Kumar Date: Wed, 2 Jun 2021 16:14:45 +0100 Subject: [PATCH 0007/2866] librbd/object_map: rbd diff between two snapshots lists entire image content Problem: rbd diff between two snapshots lists entire image content with `whole-object` switch. Solution: While computing resize diff during object map do not mark state as new data unless diff state is valid. Fixes: https://tracker.ceph.com/issues/50787 Signed-off-by: Sunny Kumar --- src/librbd/object_map/DiffRequest.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/librbd/object_map/DiffRequest.cc b/src/librbd/object_map/DiffRequest.cc index ca341f78c2f28..606d48bbf33c4 100644 --- a/src/librbd/object_map/DiffRequest.cc +++ b/src/librbd/object_map/DiffRequest.cc @@ -222,7 +222,9 @@ void DiffRequest::handle_load_object_map(int r) { uint8_t object_map_state = *it; if (object_map_state == OBJECT_NONEXISTENT) { *diff_it = DIFF_STATE_HOLE; - } else if (diff_from_start || object_map_state != OBJECT_EXISTS_CLEAN) { + } else if (diff_from_start || + (m_object_diff_state_valid && + object_map_state != OBJECT_EXISTS_CLEAN)) { *diff_it = DIFF_STATE_DATA_UPDATED; } else { *diff_it = DIFF_STATE_DATA; From 8d62c4adf74d2aee02f12bc1955490175b55bfda Mon Sep 17 00:00:00 2001 From: Sunny Kumar Date: Tue, 15 Jun 2021 14:19:44 +0100 Subject: [PATCH 0008/2866] test/librbd: add test condition for whole-object switch with fast diff enable This patch adds a test condition to test fast diff between two snapshots with whole-object switch. `TestMockObjectMapDiffRequest.EndDelta` expects diff state `DIFF_STATE_DATA_UPDATED` which is an incorrect expectation when diff is calculated between two snapshots. Fixes: https://tracker.ceph.com/issues/50787 Signed-off-by: Sunny Kumar --- src/test/librbd/object_map/test_mock_DiffRequest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/librbd/object_map/test_mock_DiffRequest.cc b/src/test/librbd/object_map/test_mock_DiffRequest.cc index f90d488a555bb..c25ae4a95c5e6 100644 --- a/src/test/librbd/object_map/test_mock_DiffRequest.cc +++ b/src/test/librbd/object_map/test_mock_DiffRequest.cc @@ -212,7 +212,7 @@ TEST_F(TestMockObjectMapDiffRequest, IntermediateDelta) { BitVector<2> object_map_1; object_map_1.resize(object_count); - object_map_1[1] = OBJECT_EXISTS_CLEAN; + object_map_1[1] = OBJECT_EXISTS; object_map_1[2] = OBJECT_EXISTS_CLEAN; expect_load_map(mock_image_ctx, 1U, object_map_1, 0); @@ -281,7 +281,7 @@ TEST_F(TestMockObjectMapDiffRequest, EndDelta) { BitVector<2> expected_diff_state; expected_diff_state.resize(object_count); expected_diff_state[1] = DIFF_STATE_DATA; - expected_diff_state[2] = DIFF_STATE_DATA_UPDATED; + expected_diff_state[2] = DIFF_STATE_DATA; expected_diff_state[3] = DIFF_STATE_HOLE_UPDATED; ASSERT_EQ(expected_diff_state, m_object_diff_state); } From 7511f9f675ea4e43992605dc03109bc5f356a5e1 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 2 Jul 2021 14:13:50 +0800 Subject: [PATCH 0009/2866] rgw: update last_added_entry when count == num_entries RGWRados::cls_bucket_list_unordered() will produce one redundent entry every time is_truncated is true.The issue could be easily reproduced when a bucket is filled with amounts of incomplete multipart upload. To be more specific, the number of incomplete multipart upload objects should be greater than 1100. Signed-off-by: Peng Zhang --- src/rgw/rgw_rados.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index b62e3a8a6098f..6ea6b025d3c86 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -8780,6 +8780,7 @@ int RGWRados::cls_bucket_list_unordered(const DoutPrefixProvider *dpp, ent_list.emplace_back(std::move(dirent)); ++count; } else { + last_added_entry = dirent.key; *is_truncated = true; goto check_updates; } From f241a330dcb5968f9ec1de1a382572258cb6daac Mon Sep 17 00:00:00 2001 From: Lei Zhang <1091517373@qq.com> Date: Wed, 14 Jul 2021 17:30:48 +0800 Subject: [PATCH 0010/2866] rgw: RGWPostObj::execute() may lost data. Signed-off-by: Lei Zhang <1091517373@qq.com> --- src/rgw/rgw_op.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 167ad3593e446..b0945742c6faf 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -4284,6 +4284,9 @@ void RGWPostObj::execute(optional_yield y) hash.Update((const unsigned char *)data.c_str(), data.length()); op_ret = filter->process(std::move(data), ofs); + if (op_ret < 0) { + return; + } ofs += len; From 34082b7ee48a33e566348395395858e1e0db3013 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Fri, 4 Jun 2021 18:29:37 +0200 Subject: [PATCH 0011/2866] rbd-mirror: fix mirror image removal Invoke ImageRemoveRequest instead of calling directly mirror_image_remove so that the MirrroringWatcher can pick up local image deletion. Fixes: https://tracker.ceph.com/issues/51031 Signed-off-by: Arthur Outhenin-Chalandre --- .../test_mock_TrashMoveRequest.cc | 59 +++++++++++++++---- .../image_deleter/TrashMoveRequest.cc | 12 ++-- 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/src/test/rbd_mirror/image_deleter/test_mock_TrashMoveRequest.cc b/src/test/rbd_mirror/image_deleter/test_mock_TrashMoveRequest.cc index c3702a9cc33bf..e1a52c87602c8 100644 --- a/src/test/rbd_mirror/image_deleter/test_mock_TrashMoveRequest.cc +++ b/src/test/rbd_mirror/image_deleter/test_mock_TrashMoveRequest.cc @@ -9,6 +9,7 @@ #include "librbd/TrashWatcher.h" #include "librbd/journal/ResetRequest.h" #include "librbd/mirror/GetInfoRequest.h" +#include "librbd/mirror/ImageRemoveRequest.h" #include "librbd/trash/MoveRequest.h" #include "tools/rbd_mirror/Threads.h" #include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" @@ -131,6 +132,37 @@ struct GetInfoRequest { GetInfoRequest* GetInfoRequest::s_instance = nullptr; +template<> +struct ImageRemoveRequest { + static ImageRemoveRequest* s_instance; + std::string global_image_id; + std::string image_id; + Context* on_finish; + + static ImageRemoveRequest *create(librados::IoCtx& io_ctx, + const std::string& global_image_id, + const std::string& image_id, + Context* on_finish) { + ceph_assert(s_instance != nullptr); + s_instance->global_image_id = global_image_id; + s_instance->image_id = image_id; + s_instance->on_finish = on_finish; + return s_instance; + } + + ImageRemoveRequest() { + ceph_assert(s_instance == nullptr); + s_instance = this; + } + ~ImageRemoveRequest() { + s_instance = nullptr; + } + + MOCK_METHOD0(send, void()); +}; + +ImageRemoveRequest* ImageRemoveRequest::s_instance = nullptr; + } // namespace mirror namespace trash { @@ -185,6 +217,7 @@ class TestMockImageDeleterTrashMoveRequest : public TestMockFixture { typedef TrashMoveRequest MockTrashMoveRequest; typedef librbd::journal::ResetRequest MockJournalResetRequest; typedef librbd::mirror::GetInfoRequest MockGetMirrorInfoRequest; + typedef librbd::mirror::ImageRemoveRequest MockImageRemoveRequest; typedef librbd::trash::MoveRequest MockLibrbdTrashMoveRequest; typedef librbd::TrashWatcher MockTrashWatcher; @@ -273,11 +306,12 @@ class TestMockImageDeleterTrashMoveRequest : public TestMockFixture { .WillOnce(Return(r)); } - void expect_mirror_image_remove(librados::IoCtx &ioctx, int r) { - EXPECT_CALL(get_mock_io_ctx(ioctx), - exec(StrEq("rbd_mirroring"), _, StrEq("rbd"), - StrEq("mirror_image_remove"), _, _, _, _)) - .WillOnce(Return(r)); + void expect_mirror_image_remove_request( + MockImageRemoveRequest& mock_image_remove_request, int r) { + EXPECT_CALL(mock_image_remove_request, send()) + .WillOnce(Invoke([this, &mock_image_remove_request, r]() { + m_threads->work_queue->queue(mock_image_remove_request.on_finish, r); + })); } void expect_journal_reset(MockJournalResetRequest& mock_journal_reset_request, @@ -355,7 +389,8 @@ TEST_F(TestMockImageDeleterTrashMoveRequest, SuccessJournal) { MockLibrbdTrashMoveRequest mock_librbd_trash_move_request; expect_trash_move(mock_librbd_trash_move_request, m_image_name, "image id", {}, 0); - expect_mirror_image_remove(m_local_io_ctx, 0); + MockImageRemoveRequest mock_image_remove_request; + expect_mirror_image_remove_request(mock_image_remove_request, 0); expect_close(mock_image_ctx, 0); @@ -396,7 +431,8 @@ TEST_F(TestMockImageDeleterTrashMoveRequest, SuccessSnapshot) { MockLibrbdTrashMoveRequest mock_librbd_trash_move_request; expect_trash_move(mock_librbd_trash_move_request, m_image_name, "image id", {}, 0); - expect_mirror_image_remove(m_local_io_ctx, 0); + MockImageRemoveRequest mock_image_remove_request; + expect_mirror_image_remove_request(mock_image_remove_request, 0); expect_close(mock_image_ctx, 0); @@ -742,7 +778,8 @@ TEST_F(TestMockImageDeleterTrashMoveRequest, RemoveMirrorImageError) { MockLibrbdTrashMoveRequest mock_librbd_trash_move_request; expect_trash_move(mock_librbd_trash_move_request, m_image_name, "image id", {}, 0); - expect_mirror_image_remove(m_local_io_ctx, -EINVAL); + MockImageRemoveRequest mock_image_remove_request; + expect_mirror_image_remove_request(mock_image_remove_request, -EINVAL); expect_close(mock_image_ctx, 0); @@ -791,7 +828,8 @@ TEST_F(TestMockImageDeleterTrashMoveRequest, CloseImageError) { MockLibrbdTrashMoveRequest mock_librbd_trash_move_request; expect_trash_move(mock_librbd_trash_move_request, m_image_name, "image id", {}, 0); - expect_mirror_image_remove(m_local_io_ctx, 0); + MockImageRemoveRequest mock_image_remove_request; + expect_mirror_image_remove_request(mock_image_remove_request, 0); expect_close(mock_image_ctx, -EINVAL); @@ -842,7 +880,8 @@ TEST_F(TestMockImageDeleterTrashMoveRequest, DelayedDelation) { expect_trash_move(mock_librbd_trash_move_request, m_image_name, "image id", 600, 0); - expect_mirror_image_remove(m_local_io_ctx, 0); + MockImageRemoveRequest mock_image_remove_request; + expect_mirror_image_remove_request(mock_image_remove_request, 0); expect_close(mock_image_ctx, 0); MockTrashWatcher mock_trash_watcher; diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc index 7f718cb9c66e2..234bb69129666 100644 --- a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc @@ -15,6 +15,7 @@ #include "librbd/Utils.h" #include "librbd/asio/ContextWQ.h" #include "librbd/journal/ResetRequest.h" +#include "librbd/mirror/ImageRemoveRequest.h" #include "librbd/mirror/GetInfoRequest.h" #include "librbd/trash/MoveRequest.h" #include "tools/rbd_mirror/image_deleter/Types.h" @@ -315,15 +316,12 @@ template void TrashMoveRequest::remove_mirror_image() { dout(10) << dendl; - librados::ObjectWriteOperation op; - librbd::cls_client::mirror_image_remove(&op, m_image_id); - - auto aio_comp = create_rados_callback< + auto ctx = create_context_callback< TrashMoveRequest, &TrashMoveRequest::handle_remove_mirror_image>(this); - int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); - ceph_assert(r == 0); - aio_comp->release(); + auto req = librbd::mirror::ImageRemoveRequest::create( + m_io_ctx, m_global_image_id, m_image_id, ctx); + req->send(); } template From 0e147f779d73d0688c2b89428db6012ed1560f20 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Mon, 7 Jun 2021 12:53:48 +0200 Subject: [PATCH 0012/2866] cls/rbd: add mirror_image_status_remove on client Signed-off-by: Arthur Outhenin-Chalandre --- src/cls/rbd/cls_rbd.cc | 1 - src/cls/rbd/cls_rbd_client.cc | 14 ++++++++++++++ src/cls/rbd/cls_rbd_client.h | 4 ++++ src/test/cls_rbd/test_cls_rbd.cc | 5 +++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/cls/rbd/cls_rbd.cc b/src/cls/rbd/cls_rbd.cc index e3e05d85ed500..ffc19267cf400 100644 --- a/src/cls/rbd/cls_rbd.cc +++ b/src/cls/rbd/cls_rbd.cc @@ -6366,7 +6366,6 @@ int mirror_image_status_set(cls_method_context_t hctx, bufferlist *in, * Output: * @returns 0 on success, negative error code on failure * - * NOTE: deprecated - remove this method after Octopus is unsupported */ int mirror_image_status_remove(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { diff --git a/src/cls/rbd/cls_rbd_client.cc b/src/cls/rbd/cls_rbd_client.cc index fee3ac8923cc3..2f1f37eaa9f72 100644 --- a/src/cls/rbd/cls_rbd_client.cc +++ b/src/cls/rbd/cls_rbd_client.cc @@ -2310,6 +2310,20 @@ int mirror_image_status_get_summary_finish( return 0; } +int mirror_image_status_remove(librados::IoCtx *ioctx, + const std::string &global_image_id) { + librados::ObjectWriteOperation op; + mirror_image_status_remove(&op, global_image_id); + return ioctx->operate(RBD_MIRRORING, &op); +} + +void mirror_image_status_remove(librados::ObjectWriteOperation *op, + const std::string &global_image_id) { + bufferlist bl; + encode(global_image_id, bl); + op->exec("rbd", "mirror_image_status_remove", bl); +} + int mirror_image_status_remove_down(librados::IoCtx *ioctx) { librados::ObjectWriteOperation op; mirror_image_status_remove_down(&op); diff --git a/src/cls/rbd/cls_rbd_client.h b/src/cls/rbd/cls_rbd_client.h index ef2b05fd8449e..38098805e98c4 100644 --- a/src/cls/rbd/cls_rbd_client.h +++ b/src/cls/rbd/cls_rbd_client.h @@ -478,6 +478,10 @@ void mirror_image_status_get_summary_start( int mirror_image_status_get_summary_finish( ceph::buffer::list::const_iterator *iter, std::map *states); +int mirror_image_status_remove(librados::IoCtx *ioctx, + const std::string &global_image_id); +void mirror_image_status_remove(librados::ObjectWriteOperation *op, + const std::string &global_image_id); int mirror_image_status_remove_down(librados::IoCtx *ioctx); void mirror_image_status_remove_down(librados::ObjectWriteOperation *op); diff --git a/src/test/cls_rbd/test_cls_rbd.cc b/src/test/cls_rbd/test_cls_rbd.cc index d01e04704ff64..5b33597455643 100644 --- a/src/test/cls_rbd/test_cls_rbd.cc +++ b/src/test/cls_rbd/test_cls_rbd.cc @@ -1889,6 +1889,11 @@ TEST_F(TestClsRbd, mirror_image_status) { ASSERT_EQ(1U, states.size()); ASSERT_EQ(3, states[cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN]); + // Test remove of status + ASSERT_EQ(0, mirror_image_status_set(&ioctx, "uuid1", status1)); + ASSERT_EQ(0, mirror_image_status_remove(&ioctx, "uuid1")); + ASSERT_EQ(-ENOENT, mirror_image_instance_get(&ioctx, "uuid1", &read_instance)); + // Test statuses are not down after watcher is started ASSERT_EQ(0, mirror_image_status_set(&ioctx, "uuid1", status1)); From a538c5d279c90397d375668baddd65776d2462b0 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Mon, 7 Jun 2021 14:58:03 +0200 Subject: [PATCH 0013/2866] rbd-mirror: add mirror status removal on ImageReplayer shutdown In a scenario where you have rbd-mirror daemons on both clusters. The rbd-mirror daemon on the primary site will not properly cleanup his status on image removal. This commit add a path for direct removal at the shut_down of the ImageReplayer to properly cleanup the metadata. Signed-off-by: Arthur Outhenin-Chalandre --- .../rbd_mirror/test_mock_ImageReplayer.cc | 5 +- .../test_mock_MirrorStatusUpdater.cc | 138 +++++++++++++++++- src/tools/rbd_mirror/ImageReplayer.cc | 65 +++++++-- src/tools/rbd_mirror/ImageReplayer.h | 5 + src/tools/rbd_mirror/MirrorStatusUpdater.cc | 38 ++++- src/tools/rbd_mirror/MirrorStatusUpdater.h | 5 +- 6 files changed, 228 insertions(+), 28 deletions(-) diff --git a/src/test/rbd_mirror/test_mock_ImageReplayer.cc b/src/test/rbd_mirror/test_mock_ImageReplayer.cc index 20693e3223b7b..57c2639a00ced 100644 --- a/src/test/rbd_mirror/test_mock_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_mock_ImageReplayer.cc @@ -63,7 +63,10 @@ struct MirrorStatusUpdater { MOCK_METHOD3(set_mirror_image_status, void(const std::string&, const cls::rbd::MirrorImageSiteStatus&, bool)); - MOCK_METHOD2(remove_mirror_image_status, void(const std::string&, Context*)); + MOCK_METHOD2(remove_refresh_mirror_image_status, void(const std::string&, + Context*)); + MOCK_METHOD3(remove_mirror_image_status, void(const std::string&, bool, + Context*)); }; template <> diff --git a/src/test/rbd_mirror/test_mock_MirrorStatusUpdater.cc b/src/test/rbd_mirror/test_mock_MirrorStatusUpdater.cc index 057159ceb4b88..ac8009df1379e 100644 --- a/src/test/rbd_mirror/test_mock_MirrorStatusUpdater.cc +++ b/src/test/rbd_mirror/test_mock_MirrorStatusUpdater.cc @@ -186,6 +186,38 @@ class TestMockMirrorStatusUpdater : public TestMockFixture { } } + void expect_mirror_status_remove(const std::string& global_image_id, int r) { + EXPECT_CALL(*m_mock_local_io_ctx, + exec(RBD_MIRRORING, _, StrEq("rbd"), + StrEq("mirror_image_status_remove"), _, _, _, _)) + .WillOnce(WithArg<4>(Invoke( + [r, global_image_id](bufferlist& in_bl) { + auto bl_it = in_bl.cbegin(); + std::string decode_global_image_id; + decode(decode_global_image_id, bl_it); + EXPECT_EQ(global_image_id, decode_global_image_id); + + return r; + }))); + } + + void expect_mirror_status_removes(const std::set& mirror_images, + int r) { + EXPECT_CALL(*m_mock_local_io_ctx, aio_operate(_, _, _, _, _)) + .WillOnce(Invoke([this](auto&&... args) { + int r = m_mock_local_io_ctx->do_aio_operate(decltype(args)(args)...); + m_mock_local_io_ctx->aio_flush(); + return r; + })); + + for (auto global_image_id : mirror_images) { + expect_mirror_status_remove(global_image_id, r); + if (r < 0) { + break; + } + } + } + void fire_timer_event(Context** timer_event, Context** update_task) { expect_timer_add_event(timer_event); @@ -385,6 +417,78 @@ TEST_F(TestMockMirrorStatusUpdater, OverwriteStatus) { *mock_mirror_status_watcher); } +TEST_F(TestMockMirrorStatusUpdater, RemoveStatus) { + MockMirrorStatusUpdater mock_mirror_status_updater(m_local_io_ctx, + m_mock_threads, ""); + MockMirrorStatusWatcher* mock_mirror_status_watcher = + new MockMirrorStatusWatcher(); + + InSequence seq; + + Context* timer_event = nullptr; + init_mirror_status_updater(mock_mirror_status_updater, + *mock_mirror_status_watcher, &timer_event); + + C_SaferCond ctx; + mock_mirror_status_updater.set_mirror_image_status("1", {}, false); + expect_work_queue(false); + mock_mirror_status_updater.remove_mirror_image_status("1", false, &ctx); + ASSERT_EQ(0, ctx.wait()); + + Context* update_task = nullptr; + fire_timer_event(&timer_event, &update_task); + + C_SaferCond remove_flush_ctx; + EXPECT_CALL(*m_mock_local_io_ctx, aio_operate(_, _, _, _, _)) + .WillOnce(Invoke([this, &remove_flush_ctx](auto&&... args) { + int r = m_mock_local_io_ctx->do_aio_operate(decltype(args)(args)...); + m_mock_local_io_ctx->aio_flush(); + remove_flush_ctx.complete(r); + return r; + })); + expect_mirror_status_remove("1", 0); + update_task->complete(0); + ASSERT_EQ(0, remove_flush_ctx.wait()); + + shut_down_mirror_status_updater(mock_mirror_status_updater, + *mock_mirror_status_watcher); +} + +TEST_F(TestMockMirrorStatusUpdater, OverwriteRemoveStatus) { + MockMirrorStatusUpdater mock_mirror_status_updater(m_local_io_ctx, + m_mock_threads, ""); + MockMirrorStatusWatcher* mock_mirror_status_watcher = + new MockMirrorStatusWatcher(); + + InSequence seq; + + Context* timer_event = nullptr; + init_mirror_status_updater(mock_mirror_status_updater, + *mock_mirror_status_watcher, &timer_event); + + C_SaferCond ctx; + mock_mirror_status_updater.set_mirror_image_status("1", {}, false); + expect_work_queue(false); + mock_mirror_status_updater.remove_mirror_image_status("1", false, &ctx); + ASSERT_EQ(0, ctx.wait()); + mock_mirror_status_updater.set_mirror_image_status( + "1", {"", cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING, "description"}, + false); + + + Context* update_task = nullptr; + fire_timer_event(&timer_event, &update_task); + + expect_mirror_status_update( + {{"1", cls::rbd::MirrorImageSiteStatus{ + "", cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING, "description"}}}, + "", 0); + update_task->complete(0); + + shut_down_mirror_status_updater(mock_mirror_status_updater, + *mock_mirror_status_watcher); +} + TEST_F(TestMockMirrorStatusUpdater, OverwriteStatusInFlight) { MockMirrorStatusUpdater mock_mirror_status_updater(m_local_io_ctx, m_mock_threads, ""); @@ -447,7 +551,32 @@ TEST_F(TestMockMirrorStatusUpdater, ImmediateUpdate) { *mock_mirror_status_watcher); } -TEST_F(TestMockMirrorStatusUpdater, RemoveIdleStatus) { +TEST_F(TestMockMirrorStatusUpdater, RemoveImmediateUpdate) { + MockMirrorStatusUpdater mock_mirror_status_updater(m_local_io_ctx, + m_mock_threads, ""); + MockMirrorStatusWatcher* mock_mirror_status_watcher = + new MockMirrorStatusWatcher(); + + InSequence seq; + + Context* timer_event = nullptr; + init_mirror_status_updater(mock_mirror_status_updater, + *mock_mirror_status_watcher, &timer_event); + + mock_mirror_status_updater.set_mirror_image_status("1", {}, false); + + C_SaferCond ctx; + expect_work_queue(true); + expect_work_queue(true); + expect_mirror_status_removes({"1"}, 0); + mock_mirror_status_updater.remove_mirror_image_status("1", true, &ctx); + ASSERT_EQ(0, ctx.wait()); + + shut_down_mirror_status_updater(mock_mirror_status_updater, + *mock_mirror_status_watcher); +} + +TEST_F(TestMockMirrorStatusUpdater, RemoveRefreshIdleStatus) { MockMirrorStatusUpdater mock_mirror_status_updater(m_local_io_ctx, m_mock_threads, ""); MockMirrorStatusWatcher* mock_mirror_status_watcher = @@ -463,14 +592,14 @@ TEST_F(TestMockMirrorStatusUpdater, RemoveIdleStatus) { C_SaferCond ctx; expect_work_queue(true); - mock_mirror_status_updater.remove_mirror_image_status("1", &ctx); + mock_mirror_status_updater.remove_refresh_mirror_image_status("1", &ctx); ASSERT_EQ(0, ctx.wait()); shut_down_mirror_status_updater(mock_mirror_status_updater, *mock_mirror_status_watcher); } -TEST_F(TestMockMirrorStatusUpdater, RemoveInFlightStatus) { +TEST_F(TestMockMirrorStatusUpdater, RemoveRefreshInFlightStatus) { MockMirrorStatusUpdater mock_mirror_status_updater(m_local_io_ctx, m_mock_threads, ""); MockMirrorStatusWatcher* mock_mirror_status_watcher = @@ -491,7 +620,8 @@ TEST_F(TestMockMirrorStatusUpdater, RemoveInFlightStatus) { EXPECT_CALL(*m_mock_local_io_ctx, aio_operate(_, _, _, _, _)) .WillOnce(Invoke( [this, &mock_mirror_status_updater, &on_removed](auto&&... args) { - mock_mirror_status_updater.remove_mirror_image_status("1", &on_removed); + mock_mirror_status_updater.remove_refresh_mirror_image_status( + "1", &on_removed); int r = m_mock_local_io_ctx->do_aio_operate(decltype(args)(args)...); m_mock_local_io_ctx->aio_flush(); diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index f5cd65fe821ec..9546d09fce1a2 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -312,6 +312,7 @@ void ImageReplayer::start(Context *on_finish, bool manual, bool restart) m_manual_stop = false; m_delete_requested = false; m_restart_requested = false; + m_status_removed = false; if (on_finish != nullptr) { ceph_assert(m_on_start_finish == nullptr); @@ -930,6 +931,7 @@ void ImageReplayer::handle_shut_down(int r) { dout(0) << "remote image no longer exists: scheduling deletion" << dendl; unregister_asok_hook = true; std::swap(delete_requested, m_delete_requested); + m_delete_in_progress = true; } std::swap(resync_requested, m_resync_requested); @@ -965,23 +967,12 @@ void ImageReplayer::handle_shut_down(int r) { return; } - if (m_local_status_updater->exists(m_global_image_id)) { - dout(15) << "removing local mirror image status" << dendl; - auto ctx = new LambdaContext([this, r](int) { - handle_shut_down(r); - }); - m_local_status_updater->remove_mirror_image_status(m_global_image_id, ctx); - return; - } - - if (m_remote_image_peer.mirror_status_updater != nullptr && - m_remote_image_peer.mirror_status_updater->exists(m_global_image_id)) { - dout(15) << "removing remote mirror image status" << dendl; + if (!m_status_removed) { auto ctx = new LambdaContext([this, r](int) { - handle_shut_down(r); - }); - m_remote_image_peer.mirror_status_updater->remove_mirror_image_status( - m_global_image_id, ctx); + m_status_removed = true; + handle_shut_down(r); + }); + remove_image_status(m_delete_in_progress, ctx); return; } @@ -1137,6 +1128,48 @@ void ImageReplayer::reregister_admin_socket_hook() { register_admin_socket_hook(); } +template +void ImageReplayer::remove_image_status(bool force, Context *on_finish) +{ + auto ctx = new LambdaContext([this, force, on_finish](int) { + remove_image_status_remote(force, on_finish); + }); + + if (m_local_status_updater->exists(m_global_image_id)) { + dout(15) << "removing local mirror image status" << dendl; + if (force) { + m_local_status_updater->remove_mirror_image_status( + m_global_image_id, true, ctx); + } else { + m_local_status_updater->remove_refresh_mirror_image_status( + m_global_image_id, ctx); + } + return; + } + + ctx->complete(0); +} + +template +void ImageReplayer::remove_image_status_remote(bool force, Context *on_finish) +{ + if (m_remote_image_peer.mirror_status_updater != nullptr && + m_remote_image_peer.mirror_status_updater->exists(m_global_image_id)) { + dout(15) << "removing remote mirror image status" << dendl; + if (force) { + m_remote_image_peer.mirror_status_updater->remove_mirror_image_status( + m_global_image_id, true, on_finish); + } else { + m_remote_image_peer.mirror_status_updater->remove_refresh_mirror_image_status( + m_global_image_id, on_finish); + } + return; + } + if (on_finish) { + on_finish->complete(0); + } +} + template std::ostream &operator<<(std::ostream &os, const ImageReplayer &replayer) { diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h index 47f221278706b..feb5ae546a0e5 100644 --- a/src/tools/rbd_mirror/ImageReplayer.h +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -204,10 +204,13 @@ class ImageReplayer { BootstrapProgressContext m_progress_cxt; bool m_finished = false; + bool m_delete_in_progress = false; bool m_delete_requested = false; bool m_resync_requested = false; bool m_restart_requested = false; + bool m_status_removed = false; + image_replayer::StateBuilder* m_state_builder = nullptr; image_replayer::Replayer* m_replayer = nullptr; ReplayerListener* m_replayer_listener = nullptr; @@ -258,6 +261,8 @@ class ImageReplayer { void register_admin_socket_hook(); void unregister_admin_socket_hook(); void reregister_admin_socket_hook(); + void remove_image_status(bool force, Context *on_finish); + void remove_image_status_remote(bool force, Context *on_finish); }; diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.cc b/src/tools/rbd_mirror/MirrorStatusUpdater.cc index 0530db601f3bc..257cb1df27cf7 100644 --- a/src/tools/rbd_mirror/MirrorStatusUpdater.cc +++ b/src/tools/rbd_mirror/MirrorStatusUpdater.cc @@ -189,18 +189,33 @@ void MirrorStatusUpdater::set_mirror_image_status( } } +template +void MirrorStatusUpdater::remove_refresh_mirror_image_status( + const std::string& global_image_id, + Context* on_finish) { + if (try_remove_mirror_image_status(global_image_id, false, false, + on_finish)) { + m_threads->work_queue->queue(on_finish, 0); + } +} + template void MirrorStatusUpdater::remove_mirror_image_status( - const std::string& global_image_id, Context* on_finish) { - if (try_remove_mirror_image_status(global_image_id, on_finish)) { + const std::string& global_image_id, bool immediate_update, + Context* on_finish) { + if (try_remove_mirror_image_status(global_image_id, true, immediate_update, + on_finish)) { m_threads->work_queue->queue(on_finish, 0); } } template bool MirrorStatusUpdater::try_remove_mirror_image_status( - const std::string& global_image_id, Context* on_finish) { - dout(15) << "global_image_id=" << global_image_id << dendl; + const std::string& global_image_id, bool queue_update, + bool immediate_update, Context* on_finish) { + dout(15) << "global_image_id=" << global_image_id << ", " + << "queue_update=" << queue_update << ", " + << "immediate_update=" << immediate_update << dendl; std::unique_lock locker(m_lock); if ((m_update_in_flight && @@ -209,8 +224,10 @@ bool MirrorStatusUpdater::try_remove_mirror_image_status( m_update_global_image_ids.count(global_image_id) > 0)) { // if update is scheduled/in-progress, wait for it to complete on_finish = new LambdaContext( - [this, global_image_id, on_finish](int r) { - if (try_remove_mirror_image_status(global_image_id, on_finish)) { + [this, global_image_id, queue_update, immediate_update, + on_finish](int r) { + if (try_remove_mirror_image_status(global_image_id, queue_update, + immediate_update, on_finish)) { on_finish->complete(0); } }); @@ -219,6 +236,13 @@ bool MirrorStatusUpdater::try_remove_mirror_image_status( } m_global_image_status.erase(global_image_id); + if (queue_update) { + m_update_global_image_ids.insert(global_image_id); + if (immediate_update) { + queue_update_task(std::move(locker)); + } + } + return true; } @@ -314,6 +338,8 @@ void MirrorStatusUpdater::update_task(int r) { auto status_it = global_image_status.find(global_image_id); if (status_it == global_image_status.end()) { + librbd::cls_client::mirror_image_status_remove(&op, global_image_id); + ++op_count; continue; } diff --git a/src/tools/rbd_mirror/MirrorStatusUpdater.h b/src/tools/rbd_mirror/MirrorStatusUpdater.h index 60ae68ce2c7cd..783b818fc56e8 100644 --- a/src/tools/rbd_mirror/MirrorStatusUpdater.h +++ b/src/tools/rbd_mirror/MirrorStatusUpdater.h @@ -44,7 +44,9 @@ class MirrorStatusUpdater { const cls::rbd::MirrorImageSiteStatus& mirror_image_site_status, bool immediate_update); void remove_mirror_image_status(const std::string& global_image_id, - Context* on_finish); + bool immediate_update, Context* on_finish); + void remove_refresh_mirror_image_status(const std::string& global_image_id, + Context* on_finish); private: /** @@ -90,6 +92,7 @@ class MirrorStatusUpdater { GlobalImageIds m_updating_global_image_ids; bool try_remove_mirror_image_status(const std::string& global_image_id, + bool queue_update, bool immediate_update, Context* on_finish); void init_mirror_status_watcher(Context* on_finish); From dd425ed5aad1c60304b5097016e2ac0a0a7db883 Mon Sep 17 00:00:00 2001 From: "cao.leilc" Date: Fri, 13 Aug 2021 10:00:48 +0800 Subject: [PATCH 0014/2866] rgw : add return when access key exist in subuser create Fixes: https://tracker.ceph.com/issues/51559 Signed-off-by: caolei --- src/rgw/rgw_user.cc | 10 ++++++++-- src/rgw/rgw_user.h | 6 ++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc index 9d45170a54c7a..a627bdd9c02c6 100644 --- a/src/rgw/rgw_user.cc +++ b/src/rgw/rgw_user.cc @@ -636,8 +636,9 @@ int RGWAccessKeyPool::check_op(RGWUserAdminOpState& op_state, // don't check for secret key because we may be doing a removal - check_existing_key(op_state); - + if (check_existing_key(op_state)) { + op_state.set_access_key_exist(); + } return 0; } @@ -1180,6 +1181,11 @@ int RGWSubUserPool::add(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_s return ret; } + if (op_state.get_access_key_exist()) { + set_err_msg(err_msg, "cannot create existing key"); + return -ERR_KEY_EXIST; + } + if (key_type == KEY_TYPE_S3 && op_state.get_access_key().empty()) { op_state.set_gen_access(); } diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h index 542efc2092561..b40abc6f19b4d 100644 --- a/src/rgw/rgw_user.h +++ b/src/rgw/rgw_user.h @@ -121,6 +121,7 @@ struct RGWUserAdminOpState { std::string id; // access key std::string key; // secret key int32_t key_type{-1}; + bool access_key_exist = false; std::set mfa_ids; @@ -254,6 +255,10 @@ struct RGWUserAdminOpState { type_specified = true; } + void set_access_key_exist() { + access_key_exist = true; + } + void set_suspension(__u8 is_suspended) { suspended = is_suspended; suspension_op = true; @@ -372,6 +377,7 @@ struct RGWUserAdminOpState { void set_generate_subuser(bool flag) { gen_subuser = flag; } __u8 get_suspension_status() { return suspended; } int32_t get_key_type() {return key_type; } + bool get_access_key_exist() {return access_key_exist; } uint32_t get_subuser_perm() { return perm_mask; } int32_t get_max_buckets() { return max_buckets; } uint32_t get_op_mask() { return op_mask; } From 04b8721dd4b3f02abcb9fce7fcfbcbefb44fbe85 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Sat, 15 Aug 2020 07:51:59 +0800 Subject: [PATCH 0015/2866] client: minor cleanup to Client related code Remove the extra while spaces to clean up the code style. Signed-off-by: Xiubo Li --- src/client/Client.cc | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index ddf42266a5c78..dad50d2cb0df3 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -3585,7 +3585,7 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, if (flush) follows = in->snaprealm->get_snap_context().seq; - + auto m = make_message(op, in->ino, 0, @@ -3604,14 +3604,14 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, m->head.uid = in->uid; m->head.gid = in->gid; m->head.mode = in->mode; - + m->head.nlink = in->nlink; - + if (flush & CEPH_CAP_XATTR_EXCL) { encode(in->xattrs, m->xattrbl); m->head.xattr_version = in->xattr_version; } - + m->size = in->size; m->max_size = in->max_size; m->truncate_seq = in->truncate_seq; @@ -3761,11 +3761,7 @@ void Client::check_caps(Inode *in, unsigned flags) used &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO); } - - for (auto &p : in->caps) { - mds_rank_t mds = p.first; - Cap &cap = p.second; - + for (auto &[mds, cap] : in->caps) { MetaSession *session = &mds_sessions.at(mds); cap_used = used; @@ -3773,7 +3769,7 @@ void Client::check_caps(Inode *in, unsigned flags) cap_used &= ~in->auth_cap->issued; revoking = cap.implemented & ~cap.issued; - + ldout(cct, 10) << " cap mds." << mds << " issued " << ccap_string(cap.issued) << " implemented " << ccap_string(cap.implemented) From 60338473403de8c7dcbbdce24808821dd3260c6f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 14 Aug 2020 11:38:03 +0800 Subject: [PATCH 0016/2866] client: minor cleanup to MetaRequest class To make the code more readable and there is no need to care about the members order when initlizeing them. Signed-off-by: Xiubo Li --- src/client/MetaRequest.h | 67 ++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 40 deletions(-) diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h index fa97bf0f31626..630f5ee15bc87 100644 --- a/src/client/MetaRequest.h +++ b/src/client/MetaRequest.h @@ -21,73 +21,60 @@ class dir_result_t; struct MetaRequest { private: InodeRef _inode, _old_inode, _other_inode; - Dentry *_dentry; //associated with path - Dentry *_old_dentry; //associated with path2 - int abort_rc; + Dentry *_dentry = NULL; //associated with path + Dentry *_old_dentry = NULL; //associated with path2 + int abort_rc = 0; public: - uint64_t tid; + uint64_t tid = 0; utime_t op_stamp; ceph_mds_request_head head; filepath path, path2; std::string alternate_name; bufferlist data; - int inode_drop; //the inode caps this operation will drop - int inode_unless; //unless we have these caps already - int old_inode_drop, old_inode_unless; - int dentry_drop, dentry_unless; - int old_dentry_drop, old_dentry_unless; - int other_inode_drop, other_inode_unless; + int inode_drop = 0; //the inode caps this operation will drop + int inode_unless = 0; //unless we have these caps already + int old_inode_drop = 0, old_inode_unless = 0; + int dentry_drop = 0, dentry_unless = 0; + int old_dentry_drop = 0, old_dentry_unless = 0; + int other_inode_drop = 0, other_inode_unless = 0; std::vector cap_releases; - int regetattr_mask; // getattr mask if i need to re-stat after a traceless reply + int regetattr_mask = 0; // getattr mask if i need to re-stat after a traceless reply utime_t sent_stamp; - mds_rank_t mds; // who i am asking - mds_rank_t resend_mds; // someone wants you to (re)send the request here - bool send_to_auth; // must send to auth mds - __u32 sent_on_mseq; // mseq at last submission of this request - int num_fwd; // # of times i've been forwarded - int retry_attempt; + mds_rank_t mds = -1; // who i am asking + mds_rank_t resend_mds = -1; // someone wants you to (re)send the request here + bool send_to_auth = false; // must send to auth mds + __u32 sent_on_mseq = 0; // mseq at last submission of this request + int num_fwd = 0; // # of times i've been forwarded + int retry_attempt = 0; std::atomic ref = { 1 }; - ceph::cref_t reply; // the reply - bool kick; - bool success; - + ceph::cref_t reply = NULL; // the reply + bool kick = false; + bool success = false; + // readdir result - dir_result_t *dirp; + dir_result_t *dirp = NULL; //possible responses - bool got_unsafe; + bool got_unsafe = false; xlist::item item; xlist::item unsafe_item; xlist::item unsafe_dir_item; xlist::item unsafe_target_item; - ceph::condition_variable *caller_cond; // who to take up - ceph::condition_variable *dispatch_cond; // who to kick back + ceph::condition_variable *caller_cond = NULL; // who to take up + ceph::condition_variable *dispatch_cond = NULL; // who to kick back std::list waitfor_safe; InodeRef target; UserPerm perms; explicit MetaRequest(int op) : - _dentry(NULL), _old_dentry(NULL), abort_rc(0), - tid(0), - inode_drop(0), inode_unless(0), - old_inode_drop(0), old_inode_unless(0), - dentry_drop(0), dentry_unless(0), - old_dentry_drop(0), old_dentry_unless(0), - other_inode_drop(0), other_inode_unless(0), - regetattr_mask(0), - mds(-1), resend_mds(-1), send_to_auth(false), sent_on_mseq(0), - num_fwd(0), retry_attempt(0), - reply(0), - kick(false), success(false), dirp(NULL), - got_unsafe(false), item(this), unsafe_item(this), - unsafe_dir_item(this), unsafe_target_item(this), - caller_cond(0), dispatch_cond(0) { + item(this), unsafe_item(this), unsafe_dir_item(this), + unsafe_target_item(this) { memset(&head, 0, sizeof(head)); head.op = op; } From 5ce296c23a6ef2225345c9114cac3976f2898949 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 9 Feb 2021 11:07:44 +0800 Subject: [PATCH 0017/2866] client: minor cleanup to Fh related code To make the code more readable and there is no need to care about the members order when initlizeing them. Signed-off-by: Xiubo Li --- src/client/Fh.cc | 4 ++-- src/client/Fh.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/client/Fh.cc b/src/client/Fh.cc index 62bd261404e21..e72390df714c6 100644 --- a/src/client/Fh.cc +++ b/src/client/Fh.cc @@ -19,8 +19,8 @@ #include "Fh.h" Fh::Fh(InodeRef in, int flags, int cmode, uint64_t _gen, const UserPerm &perms) : - inode(in), _ref(1), pos(0), mds(0), mode(cmode), gen(_gen), flags(flags), - pos_locked(false), actor_perms(perms), readahead() + inode(in), mode(cmode), gen(_gen), flags(flags), actor_perms(perms), + readahead() { inode->add_fh(this); } diff --git a/src/client/Fh.h b/src/client/Fh.h index c3355ba6c5266..3448f5b6ac88e 100644 --- a/src/client/Fh.h +++ b/src/client/Fh.h @@ -13,14 +13,14 @@ class Inode; struct Fh { InodeRef inode; - int _ref; - loff_t pos; - int mds; // have to talk to mds we opened with (for now) + int _ref = 1; + loff_t pos = 0; + int mds = 0; // have to talk to mds we opened with (for now) int mode; // the mode i opened the file with uint64_t gen; int flags; - bool pos_locked; // pos is currently in use + bool pos_locked = false; // pos is currently in use std::list pos_waiters; // waiters for pos UserPerm actor_perms; // perms I opened the file with From 730dd8a87a3dbeba348dd419869ac110c57ed47a Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 17 Aug 2020 22:10:42 +0800 Subject: [PATCH 0018/2866] client: switch to use shared_ptr for mds_sessions To break the `client_lock`, when we need to unlock it and lock it later, during the gap we must make sure that the `session` won't be released. Signed-off-by: Xiubo Li --- src/client/Client.cc | 198 +++++++++++++++++++-------------------- src/client/Client.h | 8 +- src/client/MetaSession.h | 1 + 3 files changed, 103 insertions(+), 104 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index dad50d2cb0df3..00f38421f4b03 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1652,7 +1652,7 @@ void Client::dump_mds_sessions(Formatter *f, bool cap_dump) f->open_array_section("sessions"); for (const auto &p : mds_sessions) { f->open_object_section("session"); - p.second.dump(f, cap_dump); + p.second->dump(f, cap_dump); f->close_section(); } f->close_section(); @@ -1808,7 +1808,7 @@ int Client::make_request(MetaRequest *request, if (use_mds >= 0) request->resend_mds = use_mds; - MetaSession *session = NULL; + MetaSessionRef session = NULL; while (1) { if (request->aborted()) break; @@ -1859,11 +1859,11 @@ int Client::make_request(MetaRequest *request, if (!have_open_session(mds)) continue; } else { - session = &mds_sessions.at(mds); + session = mds_sessions.at(mds); } // send request. - send_request(request, session); + send_request(request, session.get()); // wait for signal ldout(cct, 20) << "awaiting reply|forward|kick on " << &caller_cond << dendl; @@ -1905,7 +1905,7 @@ int Client::make_request(MetaRequest *request, request->dispatch_cond = 0; if (r >= 0 && ptarget) - r = verify_reply_trace(r, session, request, reply, ptarget, pcreated, perms); + r = verify_reply_trace(r, session.get(), request, reply, ptarget, pcreated, perms); if (pdirbl) *pdirbl = reply->get_extra_bl(); @@ -2069,24 +2069,24 @@ bool Client::have_open_session(mds_rank_t mds) { const auto &it = mds_sessions.find(mds); return it != mds_sessions.end() && - (it->second.state == MetaSession::STATE_OPEN || - it->second.state == MetaSession::STATE_STALE); + (it->second->state == MetaSession::STATE_OPEN || + it->second->state == MetaSession::STATE_STALE); } -MetaSession *Client::_get_mds_session(mds_rank_t mds, Connection *con) +MetaSessionRef Client::_get_mds_session(mds_rank_t mds, Connection *con) { const auto &it = mds_sessions.find(mds); - if (it == mds_sessions.end() || it->second.con != con) { + if (it == mds_sessions.end() || it->second->con != con) { return NULL; } else { - return &it->second; + return it->second; } } -MetaSession *Client::_get_or_open_mds_session(mds_rank_t mds) +MetaSessionRef Client::_get_or_open_mds_session(mds_rank_t mds) { auto it = mds_sessions.find(mds); - return it == mds_sessions.end() ? _open_mds_session(mds) : &it->second; + return it == mds_sessions.end() ? _open_mds_session(mds) : it->second; } /** @@ -2160,15 +2160,15 @@ void Client::update_metadata(std::string const &k, std::string const &v) metadata[k] = v; } -MetaSession *Client::_open_mds_session(mds_rank_t mds) +MetaSessionRef Client::_open_mds_session(mds_rank_t mds) { ldout(cct, 10) << __func__ << " mds." << mds << dendl; auto addrs = mdsmap->get_addrs(mds); auto em = mds_sessions.emplace(std::piecewise_construct, std::forward_as_tuple(mds), - std::forward_as_tuple(mds, messenger->connect_to_mds(addrs), addrs)); + std::forward_as_tuple(new MetaSession(mds, messenger->connect_to_mds(addrs), addrs))); ceph_assert(em.second); /* not already present */ - MetaSession *session = &em.first->second; + auto session = em.first->second; auto m = make_message(CEPH_SESSION_REQUEST_OPEN); m->metadata = metadata; @@ -2208,7 +2208,7 @@ void Client::handle_client_session(const MConstRef& m) ldout(cct, 10) << __func__ << " " << *m << " from mds." << from << dendl; std::scoped_lock cl(client_lock); - MetaSession *session = _get_mds_session(from, m->get_connection().get()); + auto session = _get_mds_session(from, m->get_connection().get()); if (!session) { ldout(cct, 10) << " discarding session message from sessionless mds " << m->get_source_inst() << dendl; return; @@ -2222,13 +2222,13 @@ void Client::handle_client_session(const MConstRef& m) if (!missing_features.empty()) { lderr(cct) << "mds." << from << " lacks required features '" << missing_features << "', closing session " << dendl; - _close_mds_session(session); - _closed_mds_session(session, -CEPHFS_EPERM, true); + _close_mds_session(session.get()); + _closed_mds_session(session.get(), -CEPHFS_EPERM, true); break; } session->mds_features = std::move(m->supported_features); - renew_caps(session); + renew_caps(session.get()); session->state = MetaSession::STATE_OPEN; if (is_unmounting()) mount_cond.notify_all(); @@ -2239,7 +2239,7 @@ void Client::handle_client_session(const MConstRef& m) } case CEPH_SESSION_CLOSE: - _closed_mds_session(session); + _closed_mds_session(session.get()); break; case CEPH_SESSION_RENEWCAPS: @@ -2248,7 +2248,7 @@ void Client::handle_client_session(const MConstRef& m) session->cap_ttl = session->last_cap_renew_request + mdsmap->get_session_timeout(); if (was_stale) - wake_up_session_caps(session, false); + wake_up_session_caps(session.get(), false); } break; @@ -2257,7 +2257,7 @@ void Client::handle_client_session(const MConstRef& m) session->cap_gen++; session->cap_ttl = ceph_clock_now(); session->cap_ttl -= 1; - renew_caps(session); + renew_caps(session.get()); break; case CEPH_SESSION_RECALL_STATE: @@ -2273,7 +2273,7 @@ void Client::handle_client_session(const MConstRef& m) * by tick(). */ renew_and_flush_cap_releases(); - trim_caps(session, m->get_max_caps()); + trim_caps(session.get(), m->get_max_caps()); break; case CEPH_SESSION_FLUSHMSG: @@ -2285,7 +2285,7 @@ void Client::handle_client_session(const MConstRef& m) break; case CEPH_SESSION_FORCE_RO: - force_session_readonly(session); + force_session_readonly(session.get()); break; case CEPH_SESSION_REJECT: @@ -2298,7 +2298,7 @@ void Client::handle_client_session(const MConstRef& m) error_str = "unknown error"; lderr(cct) << "mds." << from << " rejected us (" << error_str << ")" << dendl; - _closed_mds_session(session, -CEPHFS_EPERM, true); + _closed_mds_session(session.get(), -CEPHFS_EPERM, true); } break; @@ -2312,7 +2312,7 @@ bool Client::_any_stale_sessions() const ceph_assert(ceph_mutex_is_locked_by_me(client_lock)); for (const auto &p : mds_sessions) { - if (p.second.state == MetaSession::STATE_STALE) { + if (p.second->state == MetaSession::STATE_STALE) { return true; } } @@ -2325,14 +2325,13 @@ void Client::_kick_stale_sessions() ldout(cct, 1) << __func__ << dendl; for (auto it = mds_sessions.begin(); it != mds_sessions.end(); ) { - MetaSession &s = it->second; - if (s.state == MetaSession::STATE_REJECTED) { - mds_sessions.erase(it++); + auto s = it->second; + if (s->state == MetaSession::STATE_REJECTED) { + mds_sessions.erase(it->first); continue; } - ++it; - if (s.state == MetaSession::STATE_STALE) - _closed_mds_session(&s); + if (s->state == MetaSession::STATE_STALE) + _closed_mds_session(s.get()); } } @@ -2431,7 +2430,7 @@ void Client::handle_client_request_forward(const MConstRefget_source().num()); std::scoped_lock cl(client_lock); - MetaSession *session = _get_mds_session(mds, fwd->get_connection().get()); + auto session = _get_mds_session(mds, fwd->get_connection().get()); if (!session) { return; } @@ -2479,7 +2478,7 @@ void Client::handle_client_reply(const MConstRef& reply) mds_rank_t mds_num = mds_rank_t(reply->get_source().num()); std::scoped_lock cl(client_lock); - MetaSession *session = _get_mds_session(mds_num, reply->get_connection().get()); + auto session = _get_mds_session(mds_num, reply->get_connection().get()); if (!session) { return; } @@ -2525,7 +2524,7 @@ void Client::handle_client_reply(const MConstRef& reply) ceph_assert(!request->reply); request->reply = reply; - insert_trace(request, session); + insert_trace(request, session.get()); // Handle unsafe reply if (!is_safe) { @@ -2867,10 +2866,9 @@ void Client::handle_mds_map(const MConstRef& m) _mdsmap.swap(mdsmap); // reset session - for (auto p = mds_sessions.begin(); p != mds_sessions.end(); ) { - mds_rank_t mds = p->first; - MetaSession *session = &p->second; - ++p; + for (auto &p : mds_sessions) { + mds_rank_t mds = p.first; + auto session = p.second; int oldstate = _mdsmap->get_state(mds); int newstate = mdsmap->get_state(mds); @@ -2889,33 +2887,33 @@ void Client::handle_mds_map(const MConstRef& m) // When new MDS starts to take over, notify kernel to trim unused entries // in its dcache/icache. Hopefully, the kernel will release some unused // inodes before the new MDS enters reconnect state. - trim_cache_for_reconnect(session); + trim_cache_for_reconnect(session.get()); } else if (oldstate == newstate) continue; // no change session->mds_state = newstate; if (newstate == MDSMap::STATE_RECONNECT) { session->con = messenger->connect_to_mds(session->addrs); - send_reconnect(session); + send_reconnect(session.get()); } else if (newstate > MDSMap::STATE_RECONNECT) { if (oldstate < MDSMap::STATE_RECONNECT) { ldout(cct, 1) << "we may miss the MDSMap::RECONNECT, close mds session ... " << dendl; - _closed_mds_session(session); + _closed_mds_session(session.get()); continue; } if (newstate >= MDSMap::STATE_ACTIVE) { if (oldstate < MDSMap::STATE_ACTIVE) { // kick new requests - kick_requests(session); - kick_flushing_caps(session); + kick_requests(session.get()); + kick_flushing_caps(session.get()); signal_context_list(session->waiting_for_open); - wake_up_session_caps(session, true); + wake_up_session_caps(session.get(), true); } connect_mds_targets(mds); } } else if (newstate == MDSMap::STATE_NULL && mds >= mdsmap->get_max_mds()) { - _closed_mds_session(session); + _closed_mds_session(session.get()); } } @@ -3072,9 +3070,9 @@ void Client::wait_unsafe_requests() { list last_unsafe_reqs; for (const auto &p : mds_sessions) { - const MetaSession &s = p.second; - if (!s.unsafe_requests.empty()) { - MetaRequest *req = s.unsafe_requests.back(); + const auto s = p.second; + if (!s->unsafe_requests.empty()) { + MetaRequest *req = s->unsafe_requests.back(); req->get(); last_unsafe_reqs.push_back(req); } @@ -3154,12 +3152,12 @@ void Client::handle_lease(const MConstRef& m) mds_rank_t mds = mds_rank_t(m->get_source().num()); std::scoped_lock cl(client_lock); - MetaSession *session = _get_mds_session(mds, m->get_connection().get()); + auto session = _get_mds_session(mds, m->get_connection().get()); if (!session) { return; } - got_mds_push(session); + got_mds_push(session.get()); ceph_seq_t seq = m->get_seq(); @@ -3762,7 +3760,7 @@ void Client::check_caps(Inode *in, unsigned flags) } for (auto &[mds, cap] : in->caps) { - MetaSession *session = &mds_sessions.at(mds); + auto session = mds_sessions.at(mds); cap_used = used; if (in->auth_cap && &cap != in->auth_cap) @@ -3817,7 +3815,7 @@ void Client::check_caps(Inode *in, unsigned flags) if (in->flags & I_KICK_FLUSH) { ldout(cct, 20) << " reflushing caps (check_caps) on " << *in << " to mds." << mds << dendl; - kick_flushing_caps(in, session); + kick_flushing_caps(in, session.get()); } if (!in->cap_snaps.empty() && in->cap_snaps.rbegin()->second.flush_tid == 0) @@ -3836,7 +3834,7 @@ void Client::check_caps(Inode *in, unsigned flags) flush_tid = 0; } - send_cap(in, session, &cap, msg_flags, cap_used, wanted, retain, + send_cap(in, session.get(), &cap, msg_flags, cap_used, wanted, retain, flushing, flush_tid); } } @@ -4704,7 +4702,7 @@ void Client::wait_sync_caps(ceph_tid_t want) ldout(cct, 10) << __func__ << " want " << want << " (last is " << last_flush_tid << ", " << num_flushing_caps << " total flushing)" << dendl; for (auto &p : mds_sessions) { - MetaSession *s = &p.second; + auto s = p.second; if (s->flushing_caps_tids.empty()) continue; ceph_tid_t oldest_tid = *s->flushing_caps_tids.begin(); @@ -4967,12 +4965,12 @@ void Client::handle_snap(const MConstRef& m) mds_rank_t mds = mds_rank_t(m->get_source().num()); std::scoped_lock cl(client_lock); - MetaSession *session = _get_mds_session(mds, m->get_connection().get()); + auto session = _get_mds_session(mds, m->get_connection().get()); if (!session) { return; } - got_mds_push(session); + got_mds_push(session.get()); map to_move; SnapRealm *realm = 0; @@ -5039,12 +5037,12 @@ void Client::handle_quota(const MConstRef& m) mds_rank_t mds = mds_rank_t(m->get_source().num()); std::scoped_lock cl(client_lock); - MetaSession *session = _get_mds_session(mds, m->get_connection().get()); + auto session = _get_mds_session(mds, m->get_connection().get()); if (!session) { return; } - got_mds_push(session); + got_mds_push(session.get()); ldout(cct, 10) << __func__ << " " << *m << " from mds." << mds << dendl; @@ -5065,7 +5063,7 @@ void Client::handle_caps(const MConstRef& m) mds_rank_t mds = mds_rank_t(m->get_source().num()); std::scoped_lock cl(client_lock); - MetaSession *session = _get_mds_session(mds, m->get_connection().get()); + auto session = _get_mds_session(mds, m->get_connection().get()); if (!session) { return; } @@ -5080,7 +5078,7 @@ void Client::handle_caps(const MConstRef& m) set_cap_epoch_barrier(m->osd_epoch_barrier); } - got_mds_push(session); + got_mds_push(session.get()); Inode *in; vinodeno_t vino(m->get_ino(), CEPH_NOSNAP); @@ -5105,20 +5103,20 @@ void Client::handle_caps(const MConstRef& m) } switch (m->get_op()) { - case CEPH_CAP_OP_EXPORT: return handle_cap_export(session, in, m); - case CEPH_CAP_OP_FLUSHSNAP_ACK: return handle_cap_flushsnap_ack(session, in, m); - case CEPH_CAP_OP_IMPORT: /* no return */ handle_cap_import(session, in, m); + case CEPH_CAP_OP_EXPORT: return handle_cap_export(session.get(), in, m); + case CEPH_CAP_OP_FLUSHSNAP_ACK: return handle_cap_flushsnap_ack(session.get(), in, m); + case CEPH_CAP_OP_IMPORT: /* no return */ handle_cap_import(session.get(), in, m); } if (auto it = in->caps.find(mds); it != in->caps.end()) { Cap &cap = in->caps.at(mds); switch (m->get_op()) { - case CEPH_CAP_OP_TRUNC: return handle_cap_trunc(session, in, m); + case CEPH_CAP_OP_TRUNC: return handle_cap_trunc(session.get(), in, m); case CEPH_CAP_OP_IMPORT: case CEPH_CAP_OP_REVOKE: - case CEPH_CAP_OP_GRANT: return handle_cap_grant(session, in, &cap, m); - case CEPH_CAP_OP_FLUSH_ACK: return handle_cap_flush_ack(session, in, &cap, m); + case CEPH_CAP_OP_GRANT: return handle_cap_grant(session.get(), in, &cap, m); + case CEPH_CAP_OP_FLUSH_ACK: return handle_cap_flush_ack(session.get(), in, &cap, m); } } else { ldout(cct, 5) << __func__ << " don't have " << *in << " cap on mds." << mds << dendl; @@ -5182,7 +5180,7 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, const MConstRef< if (cap.cap_id == m->get_cap_id()) { if (m->peer.cap_id) { const auto peer_mds = mds_rank_t(m->peer.mds); - MetaSession *tsession = _get_or_open_mds_session(peer_mds); + auto tsession = _get_or_open_mds_session(peer_mds); auto it = in->caps.find(peer_mds); if (it != in->caps.end()) { Cap &tcap = it->second; @@ -5196,10 +5194,10 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, const MConstRef< if (&cap == in->auth_cap) in->auth_cap = &tcap; if (in->auth_cap == &tcap && in->flushing_cap_item.is_on_list()) - adjust_session_flushing_caps(in, session, tsession); + adjust_session_flushing_caps(in, session, tsession.get()); } } else { - add_update_cap(in, tsession, m->peer.cap_id, cap.issued, 0, + add_update_cap(in, tsession.get(), m->peer.cap_id, cap.issued, 0, m->peer.seq - 1, m->peer.mseq, (uint64_t)-1, &cap == in->auth_cap ? CEPH_CAP_FLAG_AUTH : 0, cap.latest_perms); @@ -6253,7 +6251,7 @@ int Client::mount(const std::string &mount_root, const UserPerm& perms, void Client::_close_sessions() { for (auto it = mds_sessions.begin(); it != mds_sessions.end(); ) { - if (it->second.state == MetaSession::STATE_REJECTED) + if (it->second->state == MetaSession::STATE_REJECTED) mds_sessions.erase(it++); else ++it; @@ -6262,8 +6260,8 @@ void Client::_close_sessions() while (!mds_sessions.empty()) { // send session closes! for (auto &p : mds_sessions) { - if (p.second.state != MetaSession::STATE_CLOSING) { - _close_mds_session(&p.second); + if (p.second->state != MetaSession::STATE_CLOSING) { + _close_mds_session(p.second.get()); mds_ranks_closing.insert(p.first); } } @@ -6280,7 +6278,7 @@ void Client::_close_sessions() while (!mds_ranks_closing.empty()) { auto session = mds_sessions.at(*mds_ranks_closing.begin()); // this prunes entry from mds_sessions and mds_ranks_closing - _closed_mds_session(&session, -CEPHFS_ETIMEDOUT); + _closed_mds_session(session.get(), -CEPHFS_ETIMEDOUT); } } @@ -6305,7 +6303,7 @@ void Client::flush_mdlog_sync(Inode *in) for (auto &rank : anchor) { auto session = &mds_sessions.at(rank); - flush_mdlog(session); + flush_mdlog(session->get()); } } @@ -6314,7 +6312,7 @@ void Client::flush_mdlog_sync() if (mds_requests.empty()) return; for (auto &p : mds_sessions) { - flush_mdlog(&p.second); + flush_mdlog(p.second.get()); } } @@ -6353,8 +6351,8 @@ void Client::_abort_mds_sessions(int err) // Force-close all sessions while(!mds_sessions.empty()) { - auto& session = mds_sessions.begin()->second; - _closed_mds_session(&session, err); + auto session = mds_sessions.begin()->second; + _closed_mds_session(session.get(), err); } } @@ -6518,16 +6516,16 @@ void Client::flush_cap_releases() // send any cap releases for (auto &p : mds_sessions) { - auto &session = p.second; - if (session.release && mdsmap->is_clientreplay_or_active_or_stopping( + auto session = p.second; + if (session->release && mdsmap->is_clientreplay_or_active_or_stopping( p.first)) { - nr_caps += session.release->caps.size(); + nr_caps += session->release->caps.size(); if (cct->_conf->client_inject_release_failure) { ldout(cct, 20) << __func__ << " injecting failure to send cap release message" << dendl; } else { - session.con->send_message2(std::move(session.release)); + session->con->send_message2(std::move(session->release)); } - session.release.reset(); + session->release.reset(); } } @@ -6570,7 +6568,7 @@ void Client::tick() } signal_cond_list(waiting_for_mdsmap); for (auto &p : mds_sessions) { - signal_context_list(p.second.waiting_for_open); + signal_context_list(p.second->waiting_for_open); } } } @@ -6719,7 +6717,7 @@ void Client::renew_caps() for (auto &p : mds_sessions) { ldout(cct, 15) << "renew_caps requesting from mds." << p.first << dendl; if (mdsmap->get_state(p.first) >= MDSMap::STATE_REJOIN) - renew_caps(&p.second); + renew_caps(p.second.get()); } } @@ -6764,13 +6762,13 @@ bool Client::_dentry_valid(const Dentry *dn) utime_t now = ceph_clock_now(); if (dn->lease_mds >= 0 && dn->lease_ttl > now && mds_sessions.count(dn->lease_mds)) { - MetaSession &s = mds_sessions.at(dn->lease_mds); - if (s.cap_ttl > now && s.cap_gen == dn->lease_gen) { + auto s = mds_sessions.at(dn->lease_mds); + if (s->cap_ttl > now && s->cap_gen == dn->lease_gen) { dlease_hit(); return true; } - ldout(cct, 20) << " bad lease, cap_ttl " << s.cap_ttl << ", cap_gen " << s.cap_gen + ldout(cct, 20) << " bad lease, cap_ttl " << s->cap_ttl << ", cap_gen " << s->cap_gen << " vs lease_gen " << dn->lease_gen << dendl; } @@ -14995,11 +14993,11 @@ void Client::ms_handle_remote_reset(Connection *con) { // kludge to figure out which mds this is; fixme with a Connection* state mds_rank_t mds = MDS_RANK_NONE; - MetaSession *s = NULL; + MetaSessionRef s = NULL; for (auto &p : mds_sessions) { if (mdsmap->have_inst(p.first) && mdsmap->get_addrs(p.first) == con->get_peer_addrs()) { mds = p.first; - s = &p.second; + s = p.second; } } if (mds >= 0) { @@ -15007,7 +15005,7 @@ void Client::ms_handle_remote_reset(Connection *con) switch (s->state) { case MetaSession::STATE_CLOSING: ldout(cct, 1) << "reset from mds we were closing; we'll call that closed" << dendl; - _closed_mds_session(s); + _closed_mds_session(s.get()); break; case MetaSession::STATE_OPENING: @@ -15015,8 +15013,8 @@ void Client::ms_handle_remote_reset(Connection *con) ldout(cct, 1) << "reset from mds we were opening; retrying" << dendl; list waiters; waiters.swap(s->waiting_for_open); - _closed_mds_session(s); - MetaSession *news = _get_or_open_mds_session(mds); + _closed_mds_session(s.get()); + auto news = _get_or_open_mds_session(mds); news->waiting_for_open.swap(waiters); } break; @@ -15026,7 +15024,7 @@ void Client::ms_handle_remote_reset(Connection *con) objecter->maybe_request_map(); /* to check if we are blocklisted */ if (cct->_conf.get_val("client_reconnect_stale")) { ldout(cct, 1) << "reset from mds we were open; close mds session for reconnect" << dendl; - _closed_mds_session(s); + _closed_mds_session(s.get()); } else { ldout(cct, 1) << "reset from mds we were open; mark session as stale" << dendl; s->state = MetaSession::STATE_STALE; @@ -15414,7 +15412,7 @@ int Client::start_reclaim(const std::string& uuid, unsigned flags, continue; } - MetaSession *session; + MetaSessionRef session; if (!have_open_session(mds)) { session = _get_or_open_mds_session(mds); if (session->state == MetaSession::STATE_REJECTED) @@ -15428,7 +15426,7 @@ int Client::start_reclaim(const std::string& uuid, unsigned flags, continue; } - session = &mds_sessions.at(mds); + session = mds_sessions.at(mds); if (!session->mds_features.test(CEPHFS_FEATURE_RECLAIM_CLIENT)) return -CEPHFS_EOPNOTSUPP; @@ -15482,14 +15480,14 @@ void Client::finish_reclaim() auto it = metadata.find("reclaiming_uuid"); if (it == metadata.end()) { for (auto &p : mds_sessions) - p.second.reclaim_state = MetaSession::RECLAIM_NULL; + p.second->reclaim_state = MetaSession::RECLAIM_NULL; return; } for (auto &p : mds_sessions) { - p.second.reclaim_state = MetaSession::RECLAIM_NULL; + p.second->reclaim_state = MetaSession::RECLAIM_NULL; auto m = make_message("", MClientReclaim::FLAG_FINISH); - p.second.con->send_message2(std::move(m)); + p.second->con->send_message2(std::move(m)); } metadata["uuid"] = it->second; @@ -15502,7 +15500,7 @@ void Client::handle_client_reclaim_reply(const MConstRef& r ldout(cct, 10) << __func__ << " " << *reply << " from mds." << from << dendl; std::scoped_lock cl(client_lock); - MetaSession *session = _get_mds_session(from, reply->get_connection().get()); + auto session = _get_mds_session(from, reply->get_connection().get()); if (!session) { ldout(cct, 10) << " discarding reclaim reply from sessionless mds." << from << dendl; return; diff --git a/src/client/Client.h b/src/client/Client.h index bbf1619fedfdd..b6a76faca5083 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -901,9 +901,9 @@ class Client : public Dispatcher, public md_config_obs_t { void get_session_metadata(std::map *meta) const; bool have_open_session(mds_rank_t mds); void got_mds_push(MetaSession *s); - MetaSession *_get_mds_session(mds_rank_t mds, Connection *con); ///< return session for mds *and* con; null otherwise - MetaSession *_get_or_open_mds_session(mds_rank_t mds); - MetaSession *_open_mds_session(mds_rank_t mds); + MetaSessionRef _get_mds_session(mds_rank_t mds, Connection *con); ///< return session for mds *and* con; null otherwise + MetaSessionRef _get_or_open_mds_session(mds_rank_t mds); + MetaSessionRef _open_mds_session(mds_rank_t mds); void _close_mds_session(MetaSession *s); void _closed_mds_session(MetaSession *s, int err=0, bool rejected=false); bool _any_stale_sessions() const; @@ -1476,7 +1476,7 @@ class Client : public Dispatcher, public md_config_obs_t { epoch_t cap_epoch_barrier = 0; // mds sessions - map mds_sessions; // mds -> push seq + map mds_sessions; // mds -> push seq std::set mds_ranks_closing; // mds ranks currently tearing down sessions std::list waiting_for_mdsmap; diff --git a/src/client/MetaSession.h b/src/client/MetaSession.h index 775912b53a1a2..cfeb26a471c90 100644 --- a/src/client/MetaSession.h +++ b/src/client/MetaSession.h @@ -68,4 +68,5 @@ struct MetaSession { ceph_seq_t mseq, epoch_t osd_barrier); }; +using MetaSessionRef = std::shared_ptr; #endif From d4688848fa4710d50db5488eaefd05defcb78263 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 10 Feb 2021 14:17:11 +0800 Subject: [PATCH 0019/2866] client: adjust the Fh class members order Some members once ininitalized in the Fh constructor they won't change in the whole lifetime, and putting them under the client_lock makes no sense. And this could help simplify the inode lock code in some cases. Signed-off-by: Xiubo Li --- src/client/Fh.cc | 2 +- src/client/Fh.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/client/Fh.cc b/src/client/Fh.cc index e72390df714c6..e7dd10b366641 100644 --- a/src/client/Fh.cc +++ b/src/client/Fh.cc @@ -19,7 +19,7 @@ #include "Fh.h" Fh::Fh(InodeRef in, int flags, int cmode, uint64_t _gen, const UserPerm &perms) : - inode(in), mode(cmode), gen(_gen), flags(flags), actor_perms(perms), + inode(in), flags(flags), gen(_gen), actor_perms(perms), mode(cmode), readahead() { inode->add_fh(this); diff --git a/src/client/Fh.h b/src/client/Fh.h index 3448f5b6ac88e..4657fac8457dd 100644 --- a/src/client/Fh.h +++ b/src/client/Fh.h @@ -13,18 +13,22 @@ class Inode; struct Fh { InodeRef inode; + int flags; + uint64_t gen; + UserPerm actor_perms; // perms I opened the file with + + // the members above once ininitalized in the constructor + // they won't change, and putting them under the client_lock + // makes no sense. + int _ref = 1; loff_t pos = 0; int mds = 0; // have to talk to mds we opened with (for now) int mode; // the mode i opened the file with - uint64_t gen; - int flags; bool pos_locked = false; // pos is currently in use std::list pos_waiters; // waiters for pos - UserPerm actor_perms; // perms I opened the file with - Readahead readahead; // file lock From 3d48ecfc65c2e8df635fa404e774028e366b21e3 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 10 Feb 2021 14:19:35 +0800 Subject: [PATCH 0020/2866] client: remove useless mds member in Fh class Signed-off-by: Xiubo Li --- src/client/Fh.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/client/Fh.h b/src/client/Fh.h index 4657fac8457dd..434716fee2bac 100644 --- a/src/client/Fh.h +++ b/src/client/Fh.h @@ -23,7 +23,6 @@ struct Fh { int _ref = 1; loff_t pos = 0; - int mds = 0; // have to talk to mds we opened with (for now) int mode; // the mode i opened the file with bool pos_locked = false; // pos is currently in use From 7a3198ef6052676fa98bc2d7dd2805ca056007b9 Mon Sep 17 00:00:00 2001 From: wzbxqt Date: Fri, 6 Aug 2021 23:09:05 +0800 Subject: [PATCH 0021/2866] rgw,doc: fix command problem Signed-off-by: wangzhong --- doc/radosgw/STS.rst | 10 +++++----- doc/radosgw/role.rst | 8 ++++---- src/rgw/rgw_admin.cc | 4 ++-- src/test/cli/radosgw-admin/help.t | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/radosgw/STS.rst b/doc/radosgw/STS.rst index d843ea5d37c1a..6cfb28dd4f1ca 100644 --- a/doc/radosgw/STS.rst +++ b/doc/radosgw/STS.rst @@ -67,7 +67,7 @@ The trust between the IDP and the role is created by adding a Condition to the r allows access only to applications with the app id given in the trust policy document. The Condition is of the form:: - "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Principal\":{\"Federated\":[\"arn:aws:iam:::oidc-provider/\"]},\"Action\":[\"sts:AssumeRoleWithWebIdentity\"],\"Condition\":{\"StringEquals\":{\" :app_id\":\"\"\}\}\}\]\}" + '''{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Federated":["arn:aws:iam:::oidc-provider/"]},"Action":["sts:AssumeRoleWithWebIdentity"],"Condition":{"StringEquals":{" :app_id":""}}}]}''' The app_id in the condition above must match the 'aud' field of the incoming token. @@ -106,7 +106,7 @@ according to the permission policy attached to the role. region_name='' ) - policy_document = "{\"Version\":\"2012-10-17\",\"Statement\":{\"Effect\":\"Allow\",\"Principal\":{\"AWS\":[\"arn:aws:iam:::user/TESTER1\"]},\"Action\":[\"sts:AssumeRole\"]}]}" + policy_document = '''{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"AWS":["arn:aws:iam:::user/TESTER1"]},"Action":["sts:AssumeRole"]}]}''' role_response = iam_client.create_role( AssumeRolePolicyDocument=policy_document, @@ -114,7 +114,7 @@ according to the permission policy attached to the role. RoleName='S3Access', ) - role_policy = "{\"Version\":\"2012-10-17\",\"Statement\":{\"Effect\":\"Allow\",\"Action\":\"s3:*\",\"Resource\":\"arn:aws:s3:::*\"}}" + role_policy = '''{"Version":"2012-10-17","Statement":{"Effect":"Allow","Action":"s3:*","Resource":"arn:aws:s3:::*"}}''' response = iam_client.put_role_policy( RoleName='S3Access', @@ -171,14 +171,14 @@ according to permission policy of the role. ] ) - policy_document = "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Principal\":{\"Federated\":[\"arn:aws:iam:::oidc-provider/localhost:8080/auth/realms/demo\"]},\"Action\":[\"sts:AssumeRoleWithWebIdentity\"],\"Condition\":{\"StringEquals\":{\"localhost:8080/auth/realms/demo:app_id\":\"customer-portal\"}}}]}" + policy_document = '''{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Federated":["arn:aws:iam:::oidc-provider/localhost:8080/auth/realms/demo"]},"Action":["sts:AssumeRoleWithWebIdentity"],"Condition":{"StringEquals":{"localhost:8080/auth/realms/demo:app_id":"customer-portal"}}}]}''' role_response = iam_client.create_role( AssumeRolePolicyDocument=policy_document, Path='/', RoleName='S3Access', ) - role_policy = "{\"Version\":\"2012-10-17\",\"Statement\":{\"Effect\":\"Allow\",\"Action\":\"s3:*\",\"Resource\":\"arn:aws:s3:::*\"}}" + role_policy = '''{"Version":"2012-10-17","Statement":{"Effect":"Allow","Action":"s3:*","Resource":"arn:aws:s3:::*"}}''' response = iam_client.put_role_policy( RoleName='S3Access', diff --git a/doc/radosgw/role.rst b/doc/radosgw/role.rst index a774624315a32..2954fd0022aaa 100644 --- a/doc/radosgw/role.rst +++ b/doc/radosgw/role.rst @@ -53,7 +53,7 @@ Delete a Role To delete a role, execute the following:: - radosgw-admin role rm --role-name={role-name} + radosgw-admin role delete --role-name={role-name} Request Parameters ~~~~~~~~~~~~~~~~~~ @@ -65,7 +65,7 @@ Request Parameters For example:: - radosgw-admin role rm --role-name=S3Access1 + radosgw-admin role delete --role-name=S3Access1 Note: A role can be deleted only when it doesn't have any permission policy attached to it. @@ -267,7 +267,7 @@ Delete Policy attached to a Role To delete permission policy attached to a role, execute the following:: - radosgw-admin role policy rm --role-name={role-name} --policy-name={policy-name} + radosgw-admin role policy delete --role-name={role-name} --policy-name={policy-name} Request Parameters ~~~~~~~~~~~~~~~~~~ @@ -284,7 +284,7 @@ Request Parameters For example:: - radosgw-admin role-policy get --role-name=S3Access1 --policy-name=Policy1 + radosgw-admin role-policy delete --role-name=S3Access1 --policy-name=Policy1 REST APIs for Manipulating a Role diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index b202e9d9c31db..b246aa3e4544f 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -250,14 +250,14 @@ void usage() cout << " orphans list-jobs deprecated -- list the current job-ids for orphans search\n"; cout << " * the three 'orphans' sub-commands are now deprecated; consider using the `rgw-orphan-list` tool\n"; cout << " role create create a AWS role for use with STS\n"; - cout << " role rm remove a role\n"; + cout << " role delete remove a role\n"; cout << " role get get a role\n"; cout << " role list list roles with specified path prefix\n"; cout << " role modify modify the assume role policy of an existing role\n"; cout << " role-policy put add/update permission policy to role\n"; cout << " role-policy list list policies attached to a role\n"; cout << " role-policy get get the specified inline policy document embedded with the given role\n"; - cout << " role-policy rm remove policy attached to a role\n"; + cout << " role-policy delete remove policy attached to a role\n"; cout << " reshard add schedule a resharding of a bucket\n"; cout << " reshard list list all bucket resharding or scheduled to be resharded\n"; cout << " reshard status read bucket resharding status\n"; diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t index c63c63cb55e2c..e4afb655db54d 100644 --- a/src/test/cli/radosgw-admin/help.t +++ b/src/test/cli/radosgw-admin/help.t @@ -144,14 +144,14 @@ orphans list-jobs deprecated -- list the current job-ids for orphans search * the three 'orphans' sub-commands are now deprecated; consider using the `rgw-orphan-list` tool role create create a AWS role for use with STS - role rm remove a role + role delete remove a role role get get a role role list list roles with specified path prefix role modify modify the assume role policy of an existing role role-policy put add/update permission policy to role role-policy list list policies attached to a role role-policy get get the specified inline policy document embedded with the given role - role-policy rm remove policy attached to a role + role-policy delete remove policy attached to a role reshard add schedule a resharding of a bucket reshard list list all bucket resharding or scheduled to be resharded reshard status read bucket resharding status From 0c1c7fb886fcaaff5f00937cf62cf69feb8d4deb Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Fri, 25 Jun 2021 10:15:23 +0200 Subject: [PATCH 0022/2866] rbd-mirror: remove mirror image at shut_down when there is no images Some cases makes the ImageReplayer to be eternally restarted if there is no local and remote images. If both images are absent and that the local image id exists, the ImageReplayer shutdown will request a mirror image removal. Signed-off-by: Arthur Outhenin-Chalandre --- .../rbd_mirror/test_mock_ImageReplayer.cc | 67 +++++++++++++++---- .../image_deleter/TrashMoveRequest.cc | 11 +++ .../image_replayer/BootstrapRequest.cc | 8 +-- .../rbd_mirror/image_replayer/StateBuilder.h | 4 +- 4 files changed, 71 insertions(+), 19 deletions(-) diff --git a/src/test/rbd_mirror/test_mock_ImageReplayer.cc b/src/test/rbd_mirror/test_mock_ImageReplayer.cc index 57c2639a00ced..e75fa0ac4907c 100644 --- a/src/test/rbd_mirror/test_mock_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_mock_ImageReplayer.cc @@ -283,10 +283,11 @@ class TestMockImageReplayer : public TestMockFixture { void expect_send(MockBootstrapRequest& mock_bootstrap_request, MockStateBuilder& mock_state_builder, librbd::MockTestImageCtx& mock_local_image_ctx, - bool do_resync, int r) { + bool do_resync, bool set_local_image, int r) { EXPECT_CALL(mock_bootstrap_request, send()) .WillOnce(Invoke([this, &mock_bootstrap_request, &mock_state_builder, - &mock_local_image_ctx, do_resync, r]() { + &mock_local_image_ctx, set_local_image, do_resync, + r]() { if (r == 0 || r == -ENOLINK) { mock_state_builder.local_image_id = mock_local_image_ctx.id; mock_state_builder.remote_image_id = m_remote_image_ctx->id; @@ -296,9 +297,15 @@ class TestMockImageReplayer : public TestMockFixture { mock_state_builder.local_image_ctx = &mock_local_image_ctx; *mock_bootstrap_request.do_resync = do_resync; } - if (r < 0) { + if (r < 0 && r != -ENOENT) { mock_state_builder.remote_image_id = ""; } + if (r == -ENOENT) { + *mock_bootstrap_request.state_builder = &mock_state_builder; + } + if (set_local_image) { + mock_state_builder.local_image_id = mock_local_image_ctx.id; + } mock_bootstrap_request.on_finish->complete(r); })); } @@ -405,7 +412,7 @@ TEST_F(TestMockImageReplayer, StartStop) { MockBootstrapRequest mock_bootstrap_request; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, 0); + false, false, 0); expect_create_replayer(mock_state_builder, mock_replayer); expect_init(mock_replayer, 0); @@ -447,8 +454,42 @@ TEST_F(TestMockImageReplayer, LocalImagePrimary) { MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, -ENOMSG); + false, false, -ENOMSG); + + expect_mirror_image_status_exists(false); + + create_image_replayer(mock_threads); + + C_SaferCond start_ctx; + m_image_replayer->start(&start_ctx); + ASSERT_EQ(0, start_ctx.wait()); +} + +TEST_F(TestMockImageReplayer, MetadataCleanup) { + // START + + create_local_image(); + librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx); + + MockThreads mock_threads(m_threads); + expect_work_queue_repeatedly(mock_threads); + expect_add_event_after_repeatedly(mock_threads); + + MockImageDeleter mock_image_deleter; + MockBootstrapRequest mock_bootstrap_request; + MockReplayer mock_replayer; + + expect_get_replay_status(mock_replayer); + expect_set_mirror_image_status_repeatedly(); + + InSequence seq; + + MockStateBuilder mock_state_builder; + expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, + false, true, -ENOLINK); + expect_close(mock_state_builder, 0); + expect_trash_move(mock_image_deleter, "global image id", false, 0); expect_mirror_image_status_exists(false); create_image_replayer(mock_threads); @@ -475,7 +516,7 @@ TEST_F(TestMockImageReplayer, BootstrapRemoteDeleted) { MockBootstrapRequest mock_bootstrap_request; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, -ENOLINK); + false, false, -ENOLINK); expect_close(mock_state_builder, 0); @@ -506,7 +547,7 @@ TEST_F(TestMockImageReplayer, BootstrapResyncRequested) { MockBootstrapRequest mock_bootstrap_request; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - true, 0); + true, false, 0); expect_close(mock_state_builder, 0); @@ -536,7 +577,7 @@ TEST_F(TestMockImageReplayer, BootstrapError) { InSequence seq; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, -EINVAL); + false, false, -EINVAL); expect_mirror_image_status_exists(false); @@ -599,7 +640,7 @@ TEST_F(TestMockImageReplayer, StopError) { InSequence seq; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, 0); + false, false, 0); expect_create_replayer(mock_state_builder, mock_replayer); expect_init(mock_replayer, 0); @@ -638,7 +679,7 @@ TEST_F(TestMockImageReplayer, ReplayerError) { InSequence seq; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, 0); + false, false, 0); expect_create_replayer(mock_state_builder, mock_replayer); expect_init(mock_replayer, -EINVAL); @@ -675,7 +716,7 @@ TEST_F(TestMockImageReplayer, ReplayerResync) { InSequence seq; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, 0); + false, false, 0); expect_create_replayer(mock_state_builder, mock_replayer); expect_init(mock_replayer, 0); @@ -718,7 +759,7 @@ TEST_F(TestMockImageReplayer, ReplayerInterrupted) { InSequence seq; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, 0); + false, false, 0); expect_create_replayer(mock_state_builder, mock_replayer); expect_init(mock_replayer, 0); @@ -766,7 +807,7 @@ TEST_F(TestMockImageReplayer, ReplayerRenamed) { InSequence seq; MockStateBuilder mock_state_builder; expect_send(mock_bootstrap_request, mock_state_builder, mock_local_image_ctx, - false, 0); + false, false, 0); expect_create_replayer(mock_state_builder, mock_replayer); expect_init(mock_replayer, 0); diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc index 234bb69129666..e53923ef38730 100644 --- a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc @@ -182,6 +182,13 @@ template void TrashMoveRequest::handle_open_image(int r) { dout(10) << "r=" << r << dendl; + if (r == -ENOENT) { + dout(5) << "mirror image does not exist, removing orphaned metadata" << dendl; + m_image_ctx = nullptr; + remove_mirror_image(); + return; + } + if (r < 0) { derr << "failed to open image: " << cpp_strerror(r) << dendl; m_image_ctx = nullptr; @@ -343,6 +350,10 @@ template void TrashMoveRequest::close_image() { dout(10) << dendl; + if (m_image_ctx == nullptr) { + handle_close_image(0); + return; + } Context *ctx = create_context_callback< TrashMoveRequest, &TrashMoveRequest::handle_close_image>(this); m_image_ctx->state->close(ctx); diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc index 1671bb9f6a65b..a34f39c15b34a 100644 --- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -194,10 +194,10 @@ void BootstrapRequest::handle_prepare_remote_image(int r) { // TODO need to support multiple remote images if (state_builder != nullptr && state_builder->remote_image_id.empty() && - !state_builder->local_image_id.empty() && - state_builder->is_linked()) { - // local image exists and is non-primary and linked to the missing - // remote image + (state_builder->local_image_id.empty() || + state_builder->is_linked())) { + // both images doesn't exist or local image exists and is non-primary + // and linked to the missing remote image finish(-ENOLINK); } else { finish(-ENOENT); diff --git a/src/tools/rbd_mirror/image_replayer/StateBuilder.h b/src/tools/rbd_mirror/image_replayer/StateBuilder.h index d055c84e02068..807fe7f9162e4 100644 --- a/src/tools/rbd_mirror/image_replayer/StateBuilder.h +++ b/src/tools/rbd_mirror/image_replayer/StateBuilder.h @@ -81,13 +81,13 @@ class StateBuilder { std::string global_image_id; - std::string local_image_id; + std::string local_image_id{}; librbd::mirror::PromotionState local_promotion_state = librbd::mirror::PROMOTION_STATE_PRIMARY; ImageCtxT* local_image_ctx = nullptr; std::string remote_mirror_uuid; - std::string remote_image_id; + std::string remote_image_id{}; librbd::mirror::PromotionState remote_promotion_state = librbd::mirror::PROMOTION_STATE_NON_PRIMARY; ImageCtxT* remote_image_ctx = nullptr; From 4db66da51211504ba0a2353180ae084ba1ab3fcf Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Fri, 11 Jun 2021 09:29:59 +0200 Subject: [PATCH 0023/2866] qa/rbd-mirror: add OMAP cleanup checks This make sure that all images are deleted in the existing qa scripts and checks if all rbd-mirror metadata in OMAP are correctly deleted. Signed-off-by: Arthur Outhenin-Chalandre --- qa/workunits/rbd/rbd_mirror_helpers.sh | 70 +++++++++++++++++++++++++ qa/workunits/rbd/rbd_mirror_journal.sh | 49 +++++++++++++++++ qa/workunits/rbd/rbd_mirror_snapshot.sh | 48 +++++++++++++++++ qa/workunits/rbd/rbd_mirror_stress.sh | 5 ++ 4 files changed, 172 insertions(+) diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh index 82299715e231e..ca715d854c176 100755 --- a/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -549,6 +549,8 @@ status() echo "${cluster} ${image_pool} ${image_ns} rbd_mirroring omap vals" rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirroring + echo "${cluster} ${image_pool} ${image_ns} rbd_mirror_leader omap vals" + rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirror_leader echo done done @@ -1096,6 +1098,20 @@ unprotect_snapshot() rbd --cluster ${cluster} snap unprotect ${pool}/${image}@${snap} } +unprotect_snapshot_retry() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + for s in 0 1 2 4 8 16 32; do + sleep ${s} + unprotect_snapshot ${cluster} ${pool} ${image} ${snap} && return 0 + done + return 1 +} + wait_for_snap_present() { local cluster=$1 @@ -1292,6 +1308,8 @@ enable_mirror() local mode=${4:-${MIRROR_IMAGE_MODE}} rbd --cluster=${cluster} mirror image enable ${pool}/${image} ${mode} + # Display image info including the global image id for debugging purpose + rbd --cluster=${cluster} info ${pool}/${image} } test_image_present() @@ -1389,6 +1407,58 @@ get_clone_format() }' } +list_omap_keys() +{ + local cluster=$1 + local pool=$2 + local obj_name=$3 + + rados --cluster ${cluster} -p ${pool} listomapkeys ${obj_name} +} + +count_omap_keys_with_filter() +{ + local cluster=$1 + local pool=$2 + local obj_name=$3 + local filter=$4 + + list_omap_keys ${cluster} ${pool} ${obj_name} | grep -c ${filter} +} + +wait_for_omap_keys() +{ + local cluster=$1 + local pool=$2 + local obj_name=$3 + local filter=$4 + + for s in 0 1 2 2 4 4 8 8 8 16 16 32; do + sleep $s + + set +e + test "$(count_omap_keys_with_filter ${cluster} ${pool} ${obj_name} ${filter})" = 0 + error_code=$? + set -e + + if [ $error_code -eq 0 ]; then + return 0 + fi + done + + return 1 +} + +wait_for_image_in_omap() +{ + local cluster=$1 + local pool=$2 + + wait_for_omap_keys ${cluster} ${pool} rbd_mirroring status_global + wait_for_omap_keys ${cluster} ${pool} rbd_mirroring image_ + wait_for_omap_keys ${cluster} ${pool} rbd_mirror_leader image_map +} + # # Main # diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh index 84fd2424f4ba8..75c60123639ba 100755 --- a/qa/workunits/rbd/rbd_mirror_journal.sh +++ b/qa/workunits/rbd/rbd_mirror_journal.sh @@ -119,6 +119,8 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then all_admin_daemons ${CLUSTER1} rbd mirror status fi +remove_image_retry ${CLUSTER2} ${POOL} ${image1} + testlog "TEST: test image rename" new_name="${image}_RENAMED" rename_image ${CLUSTER2} ${POOL} ${image} ${new_name} @@ -138,6 +140,18 @@ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' trash_restore ${CLUSTER2} ${POOL} ${image_id} wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)" +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} + +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + testlog "TEST: failover and failback" start_mirrors ${CLUSTER2} @@ -216,6 +230,8 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopp wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} +remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} testlog "TEST: cloned images" testlog " - default" @@ -240,6 +256,7 @@ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image} wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' 'primary_position' compare_images ${POOL} ${clone_image} +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image} testlog " - clone v1" clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}1 @@ -249,6 +266,10 @@ clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \ test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1 test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1 +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1 +remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1 +unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} testlog " - clone v2" parent_snap=snap_v2 @@ -277,6 +298,7 @@ test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2 wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} testlog "TEST: data pool" dp_image=test_data_pool @@ -295,6 +317,7 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'prim compare_images ${POOL} ${dp_image}@snap1 compare_images ${POOL} ${dp_image}@snap2 compare_images ${POOL} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL} ${dp_image} testlog "TEST: disable mirroring / delete non-primary image" image2=test2 @@ -379,7 +402,18 @@ for i in `seq 1 20`; do done wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}" +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1' +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2' +for i in ${image2} ${image4}; do + remove_image_retry ${CLUSTER2} ${POOL} ${i} +done + testlog "TEST: disable mirror while daemon is stopped" +# TODO: workaround for the daemon to ack the deletion, to remove when +# image_map cleanup is fixed +for i in ${image2} ${image4}; do + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' +done stop_mirrors ${CLUSTER1} stop_mirrors ${CLUSTER2} set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' @@ -388,6 +422,11 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then test_image_present ${CLUSTER1} ${POOL} ${image} 'present' fi start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} # TODO: remove start/stop of cluster2 deamons when + # image_map cleanup at startup is resolved +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} +stop_mirrors ${CLUSTER2} wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' enable_journaling ${CLUSTER2} ${POOL} ${image} @@ -415,6 +454,7 @@ remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image} disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted' wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted' +remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image} testlog " - data pool" dp_image=test_data_pool @@ -428,6 +468,7 @@ write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying' 'primary_position' compare_images ${POOL}/${NS1} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image} testlog "TEST: simple image resync" request_resync_image ${CLUSTER1} ${POOL} ${image} image_id @@ -460,6 +501,7 @@ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' compare_images ${POOL} ${image} +remove_image_retry ${CLUSTER2} ${POOL} ${image} testlog "TEST: client disconnect" image=laggy @@ -531,6 +573,7 @@ disconnect_image ${CLUSTER2} ${POOL} ${image} test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' +remove_image_retry ${CLUSTER2} ${POOL} ${image} testlog "TEST: split-brain" image=split-brain @@ -544,6 +587,12 @@ demote_image ${CLUSTER1} ${POOL} ${image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain' request_resync_image ${CLUSTER1} ${POOL} ${image} image_id wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +testlog "TEST: check if removed images' OMAP are removed" +start_mirrors ${CLUSTER2} +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then # teuthology will trash the daemon diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh index 6452739026638..f83b166984e1f 100755 --- a/qa/workunits/rbd/rbd_mirror_snapshot.sh +++ b/qa/workunits/rbd/rbd_mirror_snapshot.sh @@ -122,6 +122,8 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then all_admin_daemons ${CLUSTER1} rbd mirror status fi +remove_image_retry ${CLUSTER2} ${POOL} ${image1} + testlog "TEST: test image rename" new_name="${image}_RENAMED" rename_image ${CLUSTER2} ${POOL} ${image} ${new_name} @@ -144,6 +146,18 @@ trash_restore ${CLUSTER2} ${POOL} ${image_id} enable_mirror ${CLUSTER2} ${POOL} ${image} snapshot wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)" +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} + +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + testlog "TEST: failover and failback" start_mirrors ${CLUSTER2} @@ -222,6 +236,8 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopp wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} +remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} testlog "TEST: cloned images" testlog " - default" @@ -246,6 +262,7 @@ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image} wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' compare_images ${POOL} ${clone_image} +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image} testlog " - clone v1" clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} ${parent_image} \ @@ -256,6 +273,10 @@ clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} \ test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1 test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1 +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1 +remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1 +unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} testlog " - clone v2" parent_snap=snap_v2 @@ -288,6 +309,7 @@ mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2 wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} testlog "TEST: data pool" dp_image=test_data_pool @@ -306,6 +328,7 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' compare_images ${POOL} ${dp_image}@snap1 compare_images ${POOL} ${dp_image}@snap2 compare_images ${POOL} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL} ${dp_image} testlog "TEST: disable mirroring / delete non-primary image" image2=test2 @@ -354,7 +377,18 @@ done mirror_image_snapshot ${CLUSTER2} ${POOL} ${image2} wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}" +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1' +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2' +for i in ${image2} ${image4}; do + remove_image_retry ${CLUSTER2} ${POOL} ${i} +done + testlog "TEST: disable mirror while daemon is stopped" +# TODO: workaround for the daemon to ack the deletion, to remove when +# image_map cleanup is fixed +for i in ${image2} ${image4}; do + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' +done stop_mirrors ${CLUSTER1} stop_mirrors ${CLUSTER2} disable_mirror ${CLUSTER2} ${POOL} ${image} @@ -362,6 +396,11 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then test_image_present ${CLUSTER1} ${POOL} ${image} 'present' fi start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} # TODO: remove start/stop of cluster2 deamons when + # image_map cleanup at startup is resolved +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} +stop_mirrors ${CLUSTER2} wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' enable_mirror ${CLUSTER2} ${POOL} ${image} wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' @@ -387,6 +426,7 @@ remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image} disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted' wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted' +remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image} testlog " - data pool" dp_image=test_data_pool @@ -400,6 +440,7 @@ write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying' compare_images ${POOL}/${NS1} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image} testlog "TEST: simple image resync" request_resync_image ${CLUSTER1} ${POOL} ${image} image_id @@ -432,6 +473,7 @@ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' compare_images ${POOL} ${image} +remove_image_retry ${CLUSTER2} ${POOL} ${image} testlog "TEST: split-brain" image=split-brain @@ -445,6 +487,12 @@ demote_image ${CLUSTER1} ${POOL} ${image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain' request_resync_image ${CLUSTER1} ${POOL} ${image} image_id wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +testlog "TEST: check if removed images' OMAP are removed" +start_mirrors ${CLUSTER2} +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then # teuthology will trash the daemon diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh index a17ad75e1b58e..cb79aba7ebc9c 100755 --- a/qa/workunits/rbd/rbd_mirror_stress.sh +++ b/qa/workunits/rbd/rbd_mirror_stress.sh @@ -214,3 +214,8 @@ do purge_snapshots ${CLUSTER2} ${POOL} ${image} remove_image_retry ${CLUSTER2} ${POOL} ${image} done + +testlog "TEST: check if removed images' OMAP are removed" + +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} From e135403c736295b63fe1c8a861af40de302b8b57 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Tue, 13 Jul 2021 14:19:49 +0200 Subject: [PATCH 0024/2866] rbd-mirror: add image_map cleanup in LoadRequest In the LoadRequest in the ImageMap class add initial cleanup to remove stale entries. To cleanup the LoadRequest will query the mirror image list and remove all the image_map that are notin the list. Signed-off-by: Arthur Outhenin-Chalandre --- qa/workunits/rbd/rbd_mirror_journal.sh | 10 --- qa/workunits/rbd/rbd_mirror_snapshot.sh | 10 --- src/tools/rbd_mirror/image_map/LoadRequest.cc | 78 ++++++++++++++++++- src/tools/rbd_mirror/image_map/LoadRequest.h | 13 ++++ 4 files changed, 90 insertions(+), 21 deletions(-) diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh index 75c60123639ba..56a8b13a9a777 100755 --- a/qa/workunits/rbd/rbd_mirror_journal.sh +++ b/qa/workunits/rbd/rbd_mirror_journal.sh @@ -409,11 +409,6 @@ for i in ${image2} ${image4}; do done testlog "TEST: disable mirror while daemon is stopped" -# TODO: workaround for the daemon to ack the deletion, to remove when -# image_map cleanup is fixed -for i in ${image2} ${image4}; do - wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' -done stop_mirrors ${CLUSTER1} stop_mirrors ${CLUSTER2} set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' @@ -422,11 +417,6 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then test_image_present ${CLUSTER1} ${POOL} ${image} 'present' fi start_mirrors ${CLUSTER1} -start_mirrors ${CLUSTER2} # TODO: remove start/stop of cluster2 deamons when - # image_map cleanup at startup is resolved -wait_for_image_in_omap ${CLUSTER1} ${POOL} -wait_for_image_in_omap ${CLUSTER2} ${POOL} -stop_mirrors ${CLUSTER2} wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' enable_journaling ${CLUSTER2} ${POOL} ${image} diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh index f83b166984e1f..0060440fb8d0d 100755 --- a/qa/workunits/rbd/rbd_mirror_snapshot.sh +++ b/qa/workunits/rbd/rbd_mirror_snapshot.sh @@ -384,11 +384,6 @@ for i in ${image2} ${image4}; do done testlog "TEST: disable mirror while daemon is stopped" -# TODO: workaround for the daemon to ack the deletion, to remove when -# image_map cleanup is fixed -for i in ${image2} ${image4}; do - wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' -done stop_mirrors ${CLUSTER1} stop_mirrors ${CLUSTER2} disable_mirror ${CLUSTER2} ${POOL} ${image} @@ -396,11 +391,6 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then test_image_present ${CLUSTER1} ${POOL} ${image} 'present' fi start_mirrors ${CLUSTER1} -start_mirrors ${CLUSTER2} # TODO: remove start/stop of cluster2 deamons when - # image_map cleanup at startup is resolved -wait_for_image_in_omap ${CLUSTER1} ${POOL} -wait_for_image_in_omap ${CLUSTER2} ${POOL} -stop_mirrors ${CLUSTER2} wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' enable_mirror ${CLUSTER2} ${POOL} ${image} wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.cc b/src/tools/rbd_mirror/image_map/LoadRequest.cc index 7387b476cc0ff..46564a1607a2b 100644 --- a/src/tools/rbd_mirror/image_map/LoadRequest.cc +++ b/src/tools/rbd_mirror/image_map/LoadRequest.cc @@ -8,6 +8,7 @@ #include "include/rbd_types.h" #include "cls/rbd/cls_rbd_client.h" +#include "UpdateRequest.h" #include "LoadRequest.h" #define dout_context g_ceph_context @@ -23,6 +24,7 @@ namespace image_map { static const uint32_t MAX_RETURN = 1024; using librbd::util::create_rados_callback; +using librbd::util::create_context_callback; template LoadRequest::LoadRequest(librados::IoCtx &ioctx, @@ -80,7 +82,81 @@ void LoadRequest::handle_image_map_list(int r) { return; } - finish(0); + mirror_image_list(); +} + +template +void LoadRequest::mirror_image_list() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + LoadRequest, + &LoadRequest::handle_mirror_image_list>(this); + int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template +void LoadRequest::handle_mirror_image_list(int r) { + dout(20) << ": r=" << r << dendl; + + std::map ids; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_list_finish(&it, &ids); + } + + if (r < 0 && r != -ENOENT) { + derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + for (auto &id : ids) { + m_global_image_ids.emplace(id.second); + } + + if (ids.size() == MAX_RETURN) { + m_start_after = ids.rbegin()->first; + mirror_image_list(); + return; + } + + cleanup_image_map(); +} + +template +void LoadRequest::cleanup_image_map() { + dout(20) << dendl; + + std::set map_removals; + + auto it = m_image_mapping->begin(); + while (it != m_image_mapping->end()) { + if (m_global_image_ids.count(it->first) > 0) { + ++it; + continue; + } + map_removals.emplace(it->first); + it = m_image_mapping->erase(it); + } + + if (map_removals.size() == 0) { + finish(0); + return; + } + + auto ctx = create_context_callback< + LoadRequest, + &LoadRequest::finish>(this); + image_map::UpdateRequest *req = image_map::UpdateRequest::create( + m_ioctx, {}, std::move(map_removals), ctx); + req->send(); } template diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.h b/src/tools/rbd_mirror/image_map/LoadRequest.h index 7657e1108a068..9b1be96857aeb 100644 --- a/src/tools/rbd_mirror/image_map/LoadRequest.h +++ b/src/tools/rbd_mirror/image_map/LoadRequest.h @@ -36,6 +36,12 @@ class LoadRequest { * IMAGE_MAP_LIST. . . . . . . * | * v + * MIRROR_IMAGE_LIST + * | + * v + * CLEANUP_IMAGE_MAP + * | + * v * * * @endverbatim @@ -48,12 +54,19 @@ class LoadRequest { std::map *m_image_mapping; Context *m_on_finish; + std::set m_global_image_ids; + bufferlist m_out_bl; std::string m_start_after; void image_map_list(); void handle_image_map_list(int r); + void mirror_image_list(); + void handle_mirror_image_list(int r); + + void cleanup_image_map(); + void finish(int r); }; From 416e25794af0252ec45b35c897c8cf2e91aca383 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Thu, 22 Jul 2021 16:37:47 +0200 Subject: [PATCH 0025/2866] cls/rbd: prevent image_status when mirror image is not created This prevent image_status_set to succeed when there is no mirror image yet. This solves some stale entries that were not removed in rbd-mirror and prevent to add entries that would not be visible from the rbd cli. Signed-off-by: Arthur Outhenin-Chalandre --- src/cls/rbd/cls_rbd.cc | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/cls/rbd/cls_rbd.cc b/src/cls/rbd/cls_rbd.cc index ffc19267cf400..f9b30619c145b 100644 --- a/src/cls/rbd/cls_rbd.cc +++ b/src/cls/rbd/cls_rbd.cc @@ -5059,7 +5059,22 @@ int image_status_set(cls_method_context_t hctx, const string &global_image_id, ondisk_status.up = false; ondisk_status.last_update = ceph_clock_now(); - int r = cls_get_request_origin(hctx, &ondisk_status.origin); + std::string global_id_key = global_key(global_image_id); + std::string image_id; + int r = read_key(hctx, global_id_key, &image_id); + if (r < 0) { + return 0; + } + cls::rbd::MirrorImage mirror_image; + r = image_get(hctx, image_id, &mirror_image); + if (r < 0) { + return 0; + } + if (mirror_image.state != cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + return 0; + } + + r = cls_get_request_origin(hctx, &ondisk_status.origin); ceph_assert(r == 0); bufferlist bl; From 35398a5e17dc5a536ccd63417c937f2efe742654 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Thu, 22 Jul 2021 18:53:16 +0200 Subject: [PATCH 0026/2866] rbd-mirror: remove image_mapped condition to remove image_map In some split-brain scenario the image is removed while the image_mapped is false. This prevents the removal of image_map in OMAP and thus the entry will not be removed until the daemon is restarted. Signed-off-by: Arthur Outhenin-Chalandre --- src/tools/rbd_mirror/ImageMap.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc index 4493e073b98bc..d352fcb2c1403 100644 --- a/src/tools/rbd_mirror/ImageMap.cc +++ b/src/tools/rbd_mirror/ImageMap.cc @@ -416,7 +416,7 @@ void ImageMap::update_images_removed( to_remove.emplace_back(global_image_id, info.instance_id); } - if (image_mapped && image_removed) { + if (image_removed) { // local and peer images have been deleted if (m_policy->remove_image(global_image_id)) { schedule_action(global_image_id); From ff60aec2d9efa1842383ba0a5c3bd6b5a29389c6 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Wed, 28 Jul 2021 14:14:47 +0200 Subject: [PATCH 0027/2866] rbd-mirror: fix bootstrap sequence while the image is removed If the image is being removed the PrepareRemoteImageRequest was returning the same error if the image was disabled or non primary which doesn't allow the BootstrapRequest to have the correct error handling. This commit fix this behavior by considering that the remote image is already deleted if the image is in disabling state. Signed-off-by: Arthur Outhenin-Chalandre --- src/test/rbd_mirror/test_ImageReplayer.cc | 2 +- .../rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/rbd_mirror/test_ImageReplayer.cc b/src/test/rbd_mirror/test_ImageReplayer.cc index cb21fc7239619..76a6919d5adbb 100644 --- a/src/test/rbd_mirror/test_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_ImageReplayer.cc @@ -664,7 +664,7 @@ TYPED_TEST(TestImageReplayer, BootstrapMirrorDisabling) this->create_replayer(); C_SaferCond cond; this->m_replayer->start(&cond); - ASSERT_EQ(-EREMOTEIO, cond.wait()); + ASSERT_EQ(-ENOENT, cond.wait()); ASSERT_TRUE(this->m_replayer->is_stopped()); } diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc index 54a9dbf1650d5..08d3dd7806fc3 100644 --- a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc @@ -112,7 +112,7 @@ void PrepareRemoteImageRequest::handle_get_mirror_info(int r) { return; } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { dout(5) << "remote image mirroring is being disabled" << dendl; - finish(-EREMOTEIO); + finish(-ENOENT); return; } else if (m_promotion_state != librbd::mirror::PROMOTION_STATE_PRIMARY && (state_builder == nullptr || From 965bc4150eafc8e3bbe69f63beea9c7fbb20ceb6 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Thu, 29 Jul 2021 11:54:45 +0200 Subject: [PATCH 0028/2866] rbd-mirror: handle disabling/creating image in PrepareLocalImageRequest Signed-off-by: Arthur Outhenin-Chalandre --- .../test_mock_PrepareLocalImageRequest.cc | 100 ++++++++++++++++++ src/tools/rbd_mirror/ImageReplayer.cc | 3 + .../PrepareLocalImageRequest.cc | 32 +++++- .../image_replayer/PrepareLocalImageRequest.h | 7 ++ 4 files changed, 139 insertions(+), 3 deletions(-) diff --git a/src/test/rbd_mirror/image_replayer/test_mock_PrepareLocalImageRequest.cc b/src/test/rbd_mirror/image_replayer/test_mock_PrepareLocalImageRequest.cc index 0b0a87e8de52d..54c3b24efac96 100644 --- a/src/test/rbd_mirror/image_replayer/test_mock_PrepareLocalImageRequest.cc +++ b/src/test/rbd_mirror/image_replayer/test_mock_PrepareLocalImageRequest.cc @@ -5,6 +5,7 @@ #include "cls/rbd/cls_rbd_types.h" #include "librbd/journal/TypeTraits.h" #include "librbd/mirror/GetInfoRequest.h" +#include "tools/rbd_mirror/ImageDeleter.h" #include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" #include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" #include "tools/rbd_mirror/image_replayer/StateBuilder.h" @@ -70,6 +71,28 @@ GetInfoRequest* GetInfoRequest +struct ImageDeleter { + static ImageDeleter* s_instance; + + static void trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, bool resync, + librbd::asio::ContextWQ* work_queue, + Context* on_finish) { + ceph_assert(s_instance != nullptr); + s_instance->trash_move(global_image_id, resync, on_finish); + } + + MOCK_METHOD3(trash_move, void(const std::string&, bool, Context*)); + + ImageDeleter() { + s_instance = this; + } +}; + +ImageDeleter* ImageDeleter::s_instance = nullptr; + namespace image_replayer { template <> @@ -176,6 +199,7 @@ using ::testing::WithArgs; class TestMockImageReplayerPrepareLocalImageRequest : public TestMockFixture { public: + typedef ImageDeleter MockImageDeleter; typedef PrepareLocalImageRequest MockPrepareLocalImageRequest; typedef GetMirrorImageIdRequest MockGetMirrorImageIdRequest; typedef StateBuilder MockStateBuilder; @@ -222,6 +246,17 @@ class TestMockImageReplayerPrepareLocalImageRequest : public TestMockFixture { mock_get_mirror_info_request.on_finish, r); })); } + + void expect_trash_move(MockImageDeleter& mock_image_deleter, + const std::string& global_image_id, + bool ignore_orphan, int r) { + EXPECT_CALL(mock_image_deleter, + trash_move(global_image_id, ignore_orphan, _)) + .WillOnce(WithArg<2>(Invoke([this, r](Context* ctx) { + m_threads->work_queue->queue(ctx, r); + }))); + } + }; TEST_F(TestMockImageReplayerPrepareLocalImageRequest, SuccessJournal) { @@ -400,6 +435,71 @@ TEST_F(TestMockImageReplayerPrepareLocalImageRequest, MirrorImageInfoError) { ASSERT_EQ(-EINVAL, ctx.wait()); } +TEST_F(TestMockImageReplayerPrepareLocalImageRequest, ImageCreating) { + InSequence seq; + MockGetMirrorImageIdRequest mock_get_mirror_image_id_request; + expect_get_mirror_image_id(mock_get_mirror_image_id_request, "local image id", + 0); + expect_dir_get_name(m_local_io_ctx, "local image name", 0); + + MockGetMirrorInfoRequest mock_get_mirror_info_request; + expect_get_mirror_info(mock_get_mirror_info_request, + {cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, + "global image id", + cls::rbd::MIRROR_IMAGE_STATE_CREATING}, + librbd::mirror::PROMOTION_STATE_NON_PRIMARY, + "remote mirror uuid", 0); + + MockImageDeleter mock_image_deleter; + expect_trash_move(mock_image_deleter, "global image id", false, 0); + + MockSnapshotStateBuilder mock_journal_state_builder; + MockStateBuilder* mock_state_builder = nullptr; + std::string local_image_name; + C_SaferCond ctx; + auto req = MockPrepareLocalImageRequest::create(m_local_io_ctx, + "global image id", + &local_image_name, + &mock_state_builder, + m_threads->work_queue, + &ctx); + req->send(); + + ASSERT_EQ(-ENOENT, ctx.wait()); + ASSERT_TRUE(mock_state_builder == nullptr); +} + +TEST_F(TestMockImageReplayerPrepareLocalImageRequest, ImageDisabling) { + InSequence seq; + MockGetMirrorImageIdRequest mock_get_mirror_image_id_request; + expect_get_mirror_image_id(mock_get_mirror_image_id_request, "local image id", + 0); + expect_dir_get_name(m_local_io_ctx, "local image name", 0); + + MockGetMirrorInfoRequest mock_get_mirror_info_request; + expect_get_mirror_info(mock_get_mirror_info_request, + {cls::rbd::MIRROR_IMAGE_MODE_SNAPSHOT, + "global image id", + cls::rbd::MIRROR_IMAGE_STATE_DISABLING}, + librbd::mirror::PROMOTION_STATE_NON_PRIMARY, + "remote mirror uuid", 0); + + MockSnapshotStateBuilder mock_journal_state_builder; + MockStateBuilder* mock_state_builder = nullptr; + std::string local_image_name; + C_SaferCond ctx; + auto req = MockPrepareLocalImageRequest::create(m_local_io_ctx, + "global image id", + &local_image_name, + &mock_state_builder, + m_threads->work_queue, + &ctx); + req->send(); + + ASSERT_EQ(-ERESTART, ctx.wait()); + ASSERT_TRUE(mock_state_builder == nullptr); +} + } // namespace image_replayer } // namespace mirror } // namespace rbd diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 9546d09fce1a2..f04dc68d68fa4 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -397,6 +397,9 @@ void ImageReplayer::handle_bootstrap(int r) { m_delete_requested = true; on_start_fail(0, "remote image no longer exists"); return; + } else if (r == -ERESTART) { + on_start_fail(r, "image in transient state, try again"); + return; } else if (r < 0) { on_start_fail(r, "error bootstrapping replay"); return; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc index a64117425d93f..b1fef7254760a 100644 --- a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc @@ -10,6 +10,7 @@ #include "librbd/Journal.h" #include "librbd/Utils.h" #include "librbd/mirror/GetInfoRequest.h" +#include "tools/rbd_mirror/ImageDeleter.h" #include "tools/rbd_mirror/Threads.h" #include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" #include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" @@ -124,9 +125,16 @@ void PrepareLocalImageRequest::handle_get_mirror_info(int r) { return; } - // TODO save current mirror state to determine if we should - // delete a partially formed image - // (e.g. MIRROR_IMAGE_STATE_CREATING/DELETING) + if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_CREATING) { + dout(5) << "local image is still in creating state, issuing a removal" + << dendl; + move_to_trash(); + return; + } else if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << "local image mirroring is in disabling state" << dendl; + finish(-ERESTART); + return; + } switch (m_mirror_image.mode) { case cls::rbd::MIRROR_IMAGE_MODE_JOURNAL: @@ -156,6 +164,24 @@ void PrepareLocalImageRequest::handle_get_mirror_info(int r) { finish(0); } +template +void PrepareLocalImageRequest::move_to_trash() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest, + &PrepareLocalImageRequest::handle_move_to_trash>(this); + ImageDeleter::trash_move(m_io_ctx, m_global_image_id, + false, m_work_queue, ctx); +} + +template +void PrepareLocalImageRequest::handle_move_to_trash(int r) { + dout(10) << ": r=" << r << dendl; + + finish(-ENOENT); +} + template void PrepareLocalImageRequest::finish(int r) { dout(10) << "r=" << r << dendl; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h index 8285ca060b3e3..6372169ff0550 100644 --- a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h @@ -67,6 +67,10 @@ class PrepareLocalImageRequest { * v * GET_MIRROR_INFO * | + * | (if the image mirror state is CREATING) + * v + * TRASH_MOVE + * | * v * * @@ -95,6 +99,9 @@ class PrepareLocalImageRequest { void get_mirror_info(); void handle_get_mirror_info(int r); + void move_to_trash(); + void handle_move_to_trash(int r); + void finish(int r); }; From b664a95889b35d0d3afbd8428e3af4976d7f81eb Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Fri, 6 Aug 2021 15:54:38 +0200 Subject: [PATCH 0029/2866] rbd-mirror: remove image_map next_state if sets to the same state In some cases, set_state is called with DISSOCIATING, then ASSOCIATING and DISSOCIATING again. In this case the state DISSOCIATING is processed to remove the image and then schedule the next action which is associating. To fix this case, this commit removes the next_state if the state is sets to the same state. Signed-off-by: Arthur Outhenin-Chalandre --- src/tools/rbd_mirror/image_map/Policy.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc index ccf188742a107..62fbd12dc0bc8 100644 --- a/src/tools/rbd_mirror/image_map/Policy.cc +++ b/src/tools/rbd_mirror/image_map/Policy.cc @@ -380,6 +380,7 @@ bool Policy::can_shuffle_image(const std::string &global_image_id) { bool Policy::set_state(ImageState* image_state, StateTransition::State state, bool ignore_current_state) { if (!ignore_current_state && image_state->state == state) { + image_state->next_state = boost::none; return false; } else if (StateTransition::is_idle(image_state->state)) { image_state->state = state; From 6a4e328161e4d692bf45605d62ca8fab2fee7670 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Mon, 23 Aug 2021 17:54:03 +0300 Subject: [PATCH 0030/2866] os/bluestore: dump alloc unit size on bluefs allocation failure. Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueFS.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index c991d66c3f7fb..914da608f91f2 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -3017,6 +3017,7 @@ int BlueFS::_allocate_without_fallback(uint8_t id, uint64_t len, << ", allocator type " << alloc[id]->get_type() << ", capacity 0x" << alloc[id]->get_capacity() << ", block size 0x" << alloc[id]->get_block_size() + << ", alloc size 0x" << alloc_size[id] << ", free 0x" << alloc[id]->get_free() << ", fragmentation " << alloc[id]->get_fragmentation() << ", allocated 0x" << (alloc_len > 0 ? alloc_len : 0) @@ -3060,6 +3061,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, << ", allocator type " << alloc[id]->get_type() << ", capacity 0x" << alloc[id]->get_capacity() << ", block size 0x" << alloc[id]->get_block_size() + << ", alloc size 0x" << alloc_size[id] << ", free 0x" << alloc[id]->get_free() << ", fragmentation " << alloc[id]->get_fragmentation() << ", allocated 0x" << (alloc_len > 0 ? alloc_len : 0) From b9f462f97848630d1cd85c943c42f161894b2efe Mon Sep 17 00:00:00 2001 From: Mumuni Mohammed <43080869+Kalkulus1@users.noreply.github.com> Date: Sun, 22 Aug 2021 16:25:20 +0000 Subject: [PATCH 0031/2866] doc: adds a way to allow users set_contents_from_filename for rgw Signed-off-by: Mumuni Mohammed --- doc/radosgw/s3/python.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/radosgw/s3/python.rst b/doc/radosgw/s3/python.rst index 9f9367e0fcb03..6fb314378aced 100644 --- a/doc/radosgw/s3/python.rst +++ b/doc/radosgw/s3/python.rst @@ -106,6 +106,17 @@ This creates a file ``hello.txt`` with the string ``"Hello World!"`` key = bucket.new_key('hello.txt') key.set_contents_from_string('Hello World!') + + +Uploading an Object or a File +----------------------------- + +This creates a file ``logo.png`` with the contents from the file ``"logo.png"`` + +.. code-block:: python + + key = bucket.new_key('logo.png') + key.set_contents_from_filename('logo.png') Change an Object's ACL From 8ed8e8cd485c01dd9c5eb94f62dd5d1868881d3a Mon Sep 17 00:00:00 2001 From: Mark Kogan Date: Mon, 30 Aug 2021 19:01:53 +0300 Subject: [PATCH 0032/2866] rgw: d3n: fix d3n cached objects on disk lru eviction entering infinite loop fixes: https://tracker.ceph.com/issues/52457 Signed-off-by: Mark Kogan --- src/common/options/rgw.yaml.in | 2 +- src/rgw/rgw_d3n_cacherequest.h | 2 +- src/rgw/rgw_d3n_datacache.cc | 43 ++++++++++++++++++++-------------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index c7498481d1981..f8d935854b6a2 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -3134,7 +3134,7 @@ options: type: size level: advanced desc: datacache maximum size on disk in bytes - default: 1048576 + default: 1_G services: - rgw with_legacy: true diff --git a/src/rgw/rgw_d3n_cacherequest.h b/src/rgw/rgw_d3n_cacherequest.h index 41e026bcd3453..d4acc4c0b1f8f 100644 --- a/src/rgw/rgw_d3n_cacherequest.h +++ b/src/rgw/rgw_d3n_cacherequest.h @@ -26,7 +26,7 @@ struct D3nGetObjData { struct D3nL1CacheRequest { ~D3nL1CacheRequest() { - lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "(): Read From Cache, comlete" << dendl; + lsubdout(g_ceph_context, rgw_datacache, 30) << "D3nDataCache: " << __func__ << "(): Read From Cache, complete" << dendl; } // unique_ptr with custom deleter for struct aiocb diff --git a/src/rgw/rgw_d3n_datacache.cc b/src/rgw/rgw_d3n_datacache.cc index e2ad497dae977..3ca7e11ea2ad7 100644 --- a/src/rgw/rgw_d3n_datacache.cc +++ b/src/rgw/rgw_d3n_datacache.cc @@ -161,9 +161,8 @@ void D3nDataCache::d3n_libaio_write_completion_cb(D3nCacheAioWriteRequest* c) { // update cache_map entries for new chunk in cache const std::lock_guard l(d3n_cache_lock); auto it = d3n_outstanding_write_list.find(c->oid); - if (it != d3n_outstanding_write_list.end()) { + if (it != d3n_outstanding_write_list.end()) d3n_outstanding_write_list.erase(it); - } chunk_info = new D3nChunkDataInfo; chunk_info->oid = c->oid; chunk_info->set_ctx(cct); @@ -211,10 +210,15 @@ int D3nDataCache::d3n_libaio_create_write_request(bufferlist& bl, unsigned int l void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) { - int r = 0; + size_t sr = 0; uint64_t freed_size = 0, _free_data_cache_size = 0, _outstanding_write_size = 0; - ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): oid=" << oid << dendl; + ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): oid=" << oid << ", len=" << len << dendl; + if (len > cct->_conf->rgw_d3n_l1_datacache_size) { + ldout(cct, 2) << "D3nDataCache: Warning: object oid=" << oid << ", length=" << len << + " is larger than the datacache size " << cct->_conf->rgw_d3n_l1_datacache_size << ", not writing to cache" << dendl; + return; + } { const std::lock_guard l(d3n_cache_lock); std::unordered_map::iterator iter = d3n_cache_map.find(oid); @@ -235,27 +239,33 @@ void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) _outstanding_write_size = outstanding_write_size; } ldout(cct, 20) << "D3nDataCache: Before eviction _free_data_cache_size:" << _free_data_cache_size << ", _outstanding_write_size:" << _outstanding_write_size << ", freed_size:" << freed_size << dendl; - while (len >= (_free_data_cache_size - _outstanding_write_size + freed_size)) { - ldout(cct, 20) << "D3nDataCache: enter eviction, r=" << r << dendl; + while (len > (_free_data_cache_size - _outstanding_write_size + freed_size)) { + ldout(cct, 20) << "D3nDataCache: enter eviction" << dendl; if (eviction_policy == _eviction_policy::LRU) { - r = lru_eviction(); + sr = lru_eviction(); } else if (eviction_policy == _eviction_policy::RANDOM) { - r = random_eviction(); + sr = random_eviction(); } else { ldout(cct, 0) << "D3nDataCache: Warning: unknown cache eviction policy, defaulting to lru eviction" << dendl; - r = lru_eviction(); + sr = lru_eviction(); } - if (r < 0) + if (sr == 0) { + ldout(cct, 2) << "D3nDataCache: Warning: eviction was not able to free disk space, not writing to cache" << dendl; + auto it = d3n_outstanding_write_list.find(oid); + if (it != d3n_outstanding_write_list.end()) + d3n_outstanding_write_list.erase(it); return; - freed_size += r; + } + ldout(cct, 20) << "D3nDataCache: completed eviction of " << sr << " bytes" << dendl; + freed_size += sr; } + int r = 0; r = d3n_libaio_create_write_request(bl, len, oid); if (r < 0) { const std::lock_guard l(d3n_cache_lock); auto it = d3n_outstanding_write_list.find(oid); - if (it != d3n_outstanding_write_list.end()) { + if (it != d3n_outstanding_write_list.end()) d3n_outstanding_write_list.erase(it); - } ldout(cct, 1) << "D3nDataCache: create_aio_write_request fail, r=" << r << dendl; return; } @@ -324,7 +334,7 @@ size_t D3nDataCache::random_eviction() } location = cache_location + del_oid; - remove(location.c_str()); + ::remove(location.c_str()); return freed_size; } @@ -356,13 +366,12 @@ size_t D3nDataCache::lru_eviction() del_oid = del_entry->oid; ldout(cct, 20) << "D3nDataCache: lru_eviction: oid to remove: " << del_oid << dendl; std::unordered_map::iterator iter = d3n_cache_map.find(del_oid); - if (iter != d3n_cache_map.end()) { + if (iter != d3n_cache_map.end()) d3n_cache_map.erase(iter); // oid - } } freed_size = del_entry->size; delete del_entry; location = cache_location + del_oid; - remove(location.c_str()); + ::remove(location.c_str()); return freed_size; } From 0facfac91fd8f71e5a8b869d818e7c2b07b93516 Mon Sep 17 00:00:00 2001 From: Daniel Pivonka Date: Thu, 12 Aug 2021 14:25:23 -0400 Subject: [PATCH 0033/2866] mgr/cephadm: update host when adding host that already exists update addr and labels of existing host this prevents labels being removed if a host is readded or addr being set to 127.0.... if readding host active mgr is on Fixes: https://tracker.ceph.com/issues/51667 Signed-off-by: Daniel Pivonka --- src/pybind/mgr/cephadm/inventory.py | 23 +++++++++++++++++++++-- src/pybind/mgr/cephadm/module.py | 8 ++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index cd398221123e5..d4db7761c8a9e 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -93,8 +93,16 @@ def assert_host(self, host: str) -> None: raise OrchestratorError('host %s does not exist' % host) def add_host(self, spec: HostSpec) -> None: - self._inventory[spec.hostname] = spec.to_json() - self.save() + if spec.hostname in self._inventory: + # addr + if self.get_addr(spec.hostname) != spec.addr: + self.set_addr(spec.hostname, spec.addr) + # labels + for label in spec.labels: + self.add_label(spec.hostname, label) + else: + self._inventory[spec.hostname] = spec.to_json() + self.save() def rm_host(self, host: str) -> None: self.assert_host(host) @@ -602,6 +610,17 @@ def prime_empty_host(self, host): self.registry_login_queue.add(host) self.last_client_files[host] = {} + def refresh_all_host_info(self, host): + # type: (str) -> None + + self.last_host_check.pop(host, None) + self.daemon_refresh_queue.append(host) + self.registry_login_queue.add(host) + self.device_refresh_queue.append(host) + self.last_facts_update.pop(host, None) + self.osdspec_previews_refresh_queue.append(host) + self.last_autotune.pop(host, None) + def invalidate_host_daemons(self, host): # type: (str) -> None self.daemon_refresh_queue.append(host) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index e56987c93c55f..c6fddc271b795 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1398,6 +1398,11 @@ def _schedulable_hosts(self) -> List[HostSpec]: ] def _check_valid_addr(self, host: str, addr: str) -> str: + # make sure mgr is not resolving own ip + if addr in self.get_mgr_id(): + raise OrchestratorError( + "Can not automatically resolve ip address of host where active mgr is running. Please explicitly provide the address.") + # make sure hostname is resolvable before trying to make a connection try: ip_addr = utils.resolve_ip(addr) @@ -1445,6 +1450,9 @@ def _add_host(self, spec): if spec.addr == spec.hostname and ip_addr: spec.addr = ip_addr + if spec.hostname in self.inventory and self.inventory.get_addr(spec.hostname) != spec.addr: + self.cache.refresh_all_host_info(spec.hostname) + # prime crush map? if spec.location: self.check_mon_command({ From bf8e5fe4f066589ddfb1afb345ef2cc3079feb31 Mon Sep 17 00:00:00 2001 From: Joseph Sawaya Date: Mon, 30 Aug 2021 12:00:58 -0400 Subject: [PATCH 0034/2866] mgr/rook: fix orch ls to display OSD info This commit adds OSDs to the describe_service method in the RookOrchestrator class to display the information about running OSDs when a user does `ceph orch ls`. Signed-off-by: Joseph Sawaya --- src/pybind/mgr/rook/module.py | 14 +++++++++++++- src/pybind/mgr/rook/rook_cluster.py | 4 ++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index bf6f673b302d9..0b6badcff08b9 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -348,7 +348,19 @@ def describe_service(self, size=active, last_refresh=now, ) - + if service_type == 'osd' or service_type is None: + # OSDs + all_osds = self.rook_cluster.get_osds() + svc = 'osd' + spec[svc] = orchestrator.ServiceDescription( + spec=DriveGroupSpec( + service_type='osd', + placement=PlacementSpec(count=len(all_osds), hosts=[osd.metadata.labels['topology-location-host'] for osd in all_osds]), + ), + size=len(all_osds), + last_refresh=now, + running= sum(osd.status.phase == 'Running' for osd in all_osds) + ) for dd in self._list_daemons(): if dd.service_name() not in spec: continue diff --git a/src/pybind/mgr/rook/rook_cluster.py b/src/pybind/mgr/rook/rook_cluster.py index 5a13d39294877..2fa86c9138a8f 100644 --- a/src/pybind/mgr/rook/rook_cluster.py +++ b/src/pybind/mgr/rook/rook_cluster.py @@ -703,6 +703,10 @@ def get_discovered_devices(self, nodenames: Optional[List[str]] = None) -> Dict[ self.fetcher.fetch() return self.fetcher.devices() + def get_osds(self) -> List: + osd_pods: KubernetesResource = KubernetesResource(self.coreV1_api.list_namespaced_pod, namespace='rook-ceph', label_selector='app=rook-ceph-osd') + return list(osd_pods.items) + def get_nfs_conf_url(self, nfs_cluster: str, instance: str) -> Optional[str]: # # Fetch cephnfs object for "nfs_cluster" and then return a rados:// From 82c9fff0f8d5e4fc41066979738a4eb6b38324e4 Mon Sep 17 00:00:00 2001 From: Mark Kogan Date: Wed, 1 Sep 2021 12:25:41 +0300 Subject: [PATCH 0035/2866] rgw: d3n: d3n on disk lru eviction:addressing comments Signed-off-by: Mark Kogan --- src/rgw/rgw_d3n_datacache.cc | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/src/rgw/rgw_d3n_datacache.cc b/src/rgw/rgw_d3n_datacache.cc index 3ca7e11ea2ad7..28c6a2cb2da3d 100644 --- a/src/rgw/rgw_d3n_datacache.cc +++ b/src/rgw/rgw_d3n_datacache.cc @@ -160,9 +160,7 @@ void D3nDataCache::d3n_libaio_write_completion_cb(D3nCacheAioWriteRequest* c) { // update cache_map entries for new chunk in cache const std::lock_guard l(d3n_cache_lock); - auto it = d3n_outstanding_write_list.find(c->oid); - if (it != d3n_outstanding_write_list.end()) - d3n_outstanding_write_list.erase(it); + d3n_outstanding_write_list.erase(c->oid); chunk_info = new D3nChunkDataInfo; chunk_info->oid = c->oid; chunk_info->set_ctx(cct); @@ -214,11 +212,6 @@ void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) uint64_t freed_size = 0, _free_data_cache_size = 0, _outstanding_write_size = 0; ldout(cct, 10) << "D3nDataCache::" << __func__ << "(): oid=" << oid << ", len=" << len << dendl; - if (len > cct->_conf->rgw_d3n_l1_datacache_size) { - ldout(cct, 2) << "D3nDataCache: Warning: object oid=" << oid << ", length=" << len << - " is larger than the datacache size " << cct->_conf->rgw_d3n_l1_datacache_size << ", not writing to cache" << dendl; - return; - } { const std::lock_guard l(d3n_cache_lock); std::unordered_map::iterator iter = d3n_cache_map.find(oid); @@ -251,9 +244,7 @@ void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) } if (sr == 0) { ldout(cct, 2) << "D3nDataCache: Warning: eviction was not able to free disk space, not writing to cache" << dendl; - auto it = d3n_outstanding_write_list.find(oid); - if (it != d3n_outstanding_write_list.end()) - d3n_outstanding_write_list.erase(it); + d3n_outstanding_write_list.erase(oid); return; } ldout(cct, 20) << "D3nDataCache: completed eviction of " << sr << " bytes" << dendl; @@ -263,9 +254,7 @@ void D3nDataCache::put(bufferlist& bl, unsigned int len, std::string& oid) r = d3n_libaio_create_write_request(bl, len, oid); if (r < 0) { const std::lock_guard l(d3n_cache_lock); - auto it = d3n_outstanding_write_list.find(oid); - if (it != d3n_outstanding_write_list.end()) - d3n_outstanding_write_list.erase(it); + d3n_outstanding_write_list.erase(oid); ldout(cct, 1) << "D3nDataCache: create_aio_write_request fail, r=" << r << dendl; return; } @@ -365,9 +354,7 @@ size_t D3nDataCache::lru_eviction() } del_oid = del_entry->oid; ldout(cct, 20) << "D3nDataCache: lru_eviction: oid to remove: " << del_oid << dendl; - std::unordered_map::iterator iter = d3n_cache_map.find(del_oid); - if (iter != d3n_cache_map.end()) - d3n_cache_map.erase(iter); // oid + d3n_cache_map.erase(del_oid); // oid } freed_size = del_entry->size; delete del_entry; From 870b0292352f62e7ab714a00b71262df2ec8cae8 Mon Sep 17 00:00:00 2001 From: wangyunqing Date: Tue, 30 Jun 2020 20:43:12 +0800 Subject: [PATCH 0036/2866] rgw: add try catch for user policies decode Signed-off-by: wangyunqing --- src/rgw/rgw_rest_user_policy.cc | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/rgw/rgw_rest_user_policy.cc b/src/rgw/rgw_rest_user_policy.cc index a552f3b2b2a2e..86e30a30411c1 100644 --- a/src/rgw/rgw_rest_user_policy.cc +++ b/src/rgw/rgw_rest_user_policy.cc @@ -156,6 +156,9 @@ void RGWPutUserPolicy::execute(optional_yield y) if (op_ret < 0) { op_ret = -ERR_INTERNAL_ERROR; } + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: failed to decode user policies" << dendl; + op_ret = -EIO; } catch (rgw::IAM::PolicyParseException& e) { ldpp_dout(this, 20) << "failed to parse policy: " << e.what() << dendl; op_ret = -ERR_MALFORMED_DOC; @@ -213,7 +216,13 @@ void RGWGetUserPolicy::execute(optional_yield y) map policies; if (auto it = user->get_attrs().find(RGW_ATTR_USER_POLICY); it != user->get_attrs().end()) { bufferlist bl = it->second; - decode(policies, bl); + try { + decode(policies, bl); + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: failed to decode user policies" << dendl; + op_ret = -EIO; + return; + } if (auto it = policies.find(policy_name); it != policies.end()) { policy = policies[policy_name]; dump(s->formatter); @@ -276,7 +285,13 @@ void RGWListUserPolicies::execute(optional_yield y) s->formatter->close_section(); s->formatter->open_object_section("ListUserPoliciesResult"); bufferlist bl = it->second; - decode(policies, bl); + try { + decode(policies, bl); + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: failed to decode user policies" << dendl; + op_ret = -EIO; + return; + } for (const auto& p : policies) { s->formatter->open_object_section("PolicyNames"); s->formatter->dump_string("member", p.first); @@ -348,7 +363,13 @@ void RGWDeleteUserPolicy::execute(optional_yield y) map policies; if (auto it = user->get_attrs().find(RGW_ATTR_USER_POLICY); it != user->get_attrs().end()) { bufferlist out_bl = it->second; - decode(policies, out_bl); + try { + decode(policies, out_bl); + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: failed to decode user policies" << dendl; + op_ret = -EIO; + return; + } if (auto p = policies.find(policy_name); p != policies.end()) { bufferlist in_bl; From 7e961d3ebd6cc39b38691bf6b4b7074db175c37b Mon Sep 17 00:00:00 2001 From: wangyunqing Date: Mon, 23 Nov 2020 11:19:55 +0800 Subject: [PATCH 0037/2866] rgw : check the object size when copy obj Fixes: https://tracker.ceph.com/issues/48322 Signed-off-by: wangyunqing --- src/rgw/rgw_op.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 64f4fc0e64030..def5c1dba1ff9 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -5414,6 +5414,10 @@ void RGWCopyObj::execute(optional_yield y) obj_size = astate->size; if (!s->system_request) { // no quota enforcement for system requests + if (astate->accounted_size > static_cast(s->cct->_conf->rgw_max_put_size)) { + op_ret = -ERR_TOO_LARGE; + return; + } // enforce quota against the destination bucket owner op_ret = dest_bucket->check_quota(this, user_quota, bucket_quota, astate->accounted_size, y); From 465f7e8fa34b37cf09e4d280da5239086924b942 Mon Sep 17 00:00:00 2001 From: jianglong01 Date: Tue, 7 Sep 2021 15:49:04 +0800 Subject: [PATCH 0038/2866] mgr/cephadm: the display of 'orch osd rm status' is incorrect After exec 'ceph orch osd rm --replace', 'orch osd rm status' has an wrong display. wrong display: OSD_ID HOST STATE PG_COUNT REPLACE FORCE DRAIN_STARTED_AT 15 cephqa08.cpp.zzbm.qianxin-inc.cn draining 0 True True 2021-09-07 07:54:05.776906 correct display: OSD_ID HOST STATE PG_COUNT REPLACE FORCE DRAIN_STARTED_AT 15 cephqa08.cpp.zzbm.qianxin-inc.cn draining 0 True False 2021-09-07 07:35:34.731417 Signed-off-by: jianglong01 --- src/pybind/mgr/orchestrator/module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index d75271fbff6fc..234f8c50f66af 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -858,7 +858,7 @@ def _osd_rm_status(self, format: Format = Format.plain) -> HandleCommandResult: table.right_padding_width = 2 for osd in sorted(report, key=lambda o: o.osd_id): table.add_row([osd.osd_id, osd.hostname, osd.drain_status_human(), - osd.get_pg_count(), osd.replace, osd.replace, osd.drain_started_at]) + osd.get_pg_count(), osd.replace, osd.force, osd.drain_started_at]) out = table.get_string() return HandleCommandResult(stdout=out) From f83e0ac544a77440b538b0be303b3859125915dd Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Sat, 5 Jun 2021 08:55:14 +0200 Subject: [PATCH 0039/2866] os/bluestore/bluefs: Refactor flush_and_sync_log This refactor prepares flush_and_sync_log and compact_log_async for fine-grain locks in BlueFS. There is no new logic introduced, but refactor is accompanied by some new comments. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 162 +++++++++++++++++++++++++------------ src/os/bluestore/BlueFS.h | 13 ++- 2 files changed, 120 insertions(+), 55 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index f34996be3e000..5a2c50359fcc4 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2309,6 +2309,7 @@ void BlueFS::_compact_log_async(std::unique_lock& l) // 1. allocate new log space and jump to it. old_log_jump_to = log_file->fnode.get_allocated(); + uint64_t runway = log_file->fnode.get_allocated() - log_writer->get_effective_write_pos(); dout(10) << __func__ << " old_log_jump_to 0x" << std::hex << old_log_jump_to << " need 0x" << (old_log_jump_to + cct->_conf->bluefs_max_log_runway) << std::dec << dendl; int r = _allocate(vselector->select_prefer_bdev(log_file->vselector_hint), @@ -2324,9 +2325,12 @@ void BlueFS::_compact_log_async(std::unique_lock& l) log_t.op_file_update(log_file->fnode); log_t.op_jump(log_seq, old_log_jump_to); - flush_bdev(); // FIXME? + // we need to flush all bdev because we will be streaming all dirty files to log + // TODO - think - if _flush_and_sync_log_jump will not add dirty files nor release pending allocations + // then flush_bdev() will not be necessary + flush_bdev(); - _flush_and_sync_log(l, 0, old_log_jump_to); + _flush_and_sync_log_jump(old_log_jump_to, runway); // 2. prepare compacted log bluefs_transaction_t t; @@ -2455,38 +2459,17 @@ void BlueFS::_pad_bl(bufferlist& bl) } } - -int BlueFS::_flush_and_sync_log(std::unique_lock& l, - uint64_t want_seq, - uint64_t jump_to) +// Adds to log_t file modifications mentioned in `dirty_files`. +// Note: some bluefs ops may have already been stored in log_t transaction. +uint64_t BlueFS::_consume_dirty() { - while (log_flushing) { - dout(10) << __func__ << " want_seq " << want_seq - << " log is currently flushing, waiting" << dendl; - ceph_assert(!jump_to); - log_cond.wait(l); - } - if (want_seq && want_seq <= log_seq_stable) { - dout(10) << __func__ << " want_seq " << want_seq << " <= log_seq_stable " - << log_seq_stable << ", done" << dendl; - ceph_assert(!jump_to); - return 0; - } - if (log_t.empty() && dirty_files.empty()) { - dout(10) << __func__ << " want_seq " << want_seq - << " " << log_t << " not dirty, dirty_files empty, no-op" << dendl; - ceph_assert(!jump_to); - return 0; - } - - vector> to_release(pending_release.size()); - to_release.swap(pending_release); - + //acquire new seq + // this will became log_seq_stable once we write uint64_t seq = log_t.seq = ++log_seq; - ceph_assert(want_seq == 0 || want_seq <= seq); log_t.uuid = super.uuid; // log dirty files + // we just incremented log_seq. It is now illegal to add to dirty_files[log_seq] auto lsi = dirty_files.find(seq); if (lsi != dirty_files.end()) { dout(20) << __func__ << " " << lsi->second.size() << " dirty_files" << dendl; @@ -2495,21 +2478,37 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, log_t.op_file_update(f.fnode); } } + return seq; +} - dout(10) << __func__ << " " << log_t << dendl; - ceph_assert(!log_t.empty()); - +// Extends log if its free space is smaller then bluefs_min_log_runway. +// Returns space available *BEFORE* adding new space. Signed for additional <0 detection. +int64_t BlueFS::_maybe_extend_log() +{ // allocate some more space (before we run out)? // BTW: this triggers `flush()` in the `page_aligned_appender` of `log_writer`. int64_t runway = log_writer->file->fnode.get_allocated() - log_writer->get_effective_write_pos(); - bool just_expanded_log = false; if (runway < (int64_t)cct->_conf->bluefs_min_log_runway) { dout(10) << __func__ << " allocating more log runway (0x" << std::hex << runway << std::dec << " remaining)" << dendl; - while (new_log_writer) { - dout(10) << __func__ << " waiting for async compaction" << dendl; - log_cond.wait(l); + /* + * Usually, when we are low on space in log, we just allocate new extent, + * put update op(log) to log and we are fine. + * Problem - it interferes with log compaction: + * New log produced in compaction will include - as last op - jump into some offset (anchor) of current log. + * It is assumed that log region (anchor - end) will contain all changes made by bluefs since + * full state capture into new log. + * Putting log update into (anchor - end) region is illegal, because any update there must be compatible with + * both logs, but old log is different then new log. + * + * Possible solutions: + * - stall extending log until we finish compacting and switch log (CURRENT) + * - re-run compaction with more runway for old log + * - add OP_FILE_ADDEXT that adds extent; will be compatible with both logs + */ + if (new_log_writer) { + return -EWOULDBLOCK; } vselector->sub_usage(log_writer->file->vselector_hint, log_writer->file->fnode); int r = _allocate( @@ -2519,8 +2518,13 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, ceph_assert(r == 0); vselector->add_usage(log_writer->file->vselector_hint, log_writer->file->fnode); log_t.op_file_update(log_writer->file->fnode); - just_expanded_log = true; } + return runway; +} + +void BlueFS::_flush_and_sync_log_core(int64_t runway) +{ + dout(10) << __func__ << " " << log_t << dendl; bufferlist bl; bl.reserve(super.block_size); @@ -2532,38 +2536,29 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, logger->inc(l_bluefs_logged_bytes, bl.length()); - if (just_expanded_log) { + if (true) { ceph_assert(bl.length() <= runway); // if we write this, we will have an unrecoverable data loss + // transaction will not fit extents before growth -> data loss on _replay } log_writer->append(bl); log_t.clear(); log_t.seq = 0; // just so debug output is less confusing - log_flushing = true; int r = _flush(log_writer, true); ceph_assert(r == 0); +} - if (jump_to) { - dout(10) << __func__ << " jumping log offset from 0x" << std::hex - << log_writer->pos << " -> 0x" << jump_to << std::dec << dendl; - log_writer->pos = jump_to; - vselector->sub_usage(log_writer->file->vselector_hint, log_writer->file->fnode.size); - log_writer->file->fnode.size = jump_to; - vselector->add_usage(log_writer->file->vselector_hint, log_writer->file->fnode.size); - } - - _flush_bdev_safely(log_writer); - - log_flushing = false; - log_cond.notify_all(); - +// Clears dirty_files up to (including) seq_stable. +void BlueFS::_clear_dirty_set_stable(uint64_t seq) +{ // clean dirty files if (seq > log_seq_stable) { log_seq_stable = seq; dout(20) << __func__ << " log_seq_stable " << log_seq_stable << dendl; + // undirty all files that were already streamed to log auto p = dirty_files.begin(); while (p != dirty_files.end()) { if (p->first > log_seq_stable) { @@ -2589,7 +2584,10 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, << " already >= out seq " << seq << ", we lost a race against another log flush, done" << dendl; } +} +void BlueFS::_release_pending_allocations(vector>& to_release) +{ for (unsigned i = 0; i < to_release.size(); ++i) { if (!to_release[i].empty()) { /* OK, now we have the guarantee alloc[i] won't be null. */ @@ -2609,9 +2607,67 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, } } } +} + +int BlueFS::_flush_and_sync_log(std::unique_lock& l, + uint64_t want_seq) +{ + if (want_seq && want_seq <= log_seq_stable) { + dout(10) << __func__ << " want_seq " << want_seq << " <= log_seq_stable " + << log_seq_stable << ", done" << dendl; + return 0; + } + int64_t available_runway; + do { + available_runway = _maybe_extend_log(); + if (available_runway == -EWOULDBLOCK) { + while (new_log_writer) { + dout(10) << __func__ << " waiting for async compaction" << dendl; + log_cond.wait(l); + } + } + } while (available_runway == -EWOULDBLOCK); + + ceph_assert(want_seq == 0 || want_seq <= log_seq + 1); // illegal to request seq that was not created yet + + uint64_t seq = _consume_dirty(); + vector> to_release(pending_release.size()); + to_release.swap(pending_release); + + _flush_and_sync_log_core(available_runway); + _flush_bdev_safely(log_writer); + + _clear_dirty_set_stable(seq); + _release_pending_allocations(to_release); _update_logger_stats(); + return 0; +} + +// Flushes log and immediately adjusts log_writer pos. +int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, + int64_t available_runway) +{ + ceph_assert(jump_to); + uint64_t seq = _consume_dirty(); + vector> to_release(pending_release.size()); + to_release.swap(pending_release); + + _flush_and_sync_log_core(available_runway); + + dout(10) << __func__ << " jumping log offset from 0x" << std::hex + << log_writer->pos << " -> 0x" << jump_to << std::dec << dendl; + log_writer->pos = jump_to; + vselector->sub_usage(log_writer->file->vselector_hint, log_writer->file->fnode.size); + log_writer->file->fnode.size = jump_to; + vselector->add_usage(log_writer->file->vselector_hint, log_writer->file->fnode.size); + _flush_bdev_safely(log_writer); + + _clear_dirty_set_stable(seq); + _release_pending_allocations(to_release); + + _update_logger_stats(); return 0; } diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index b285592b41df4..d8cbdbe4f661e 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -403,9 +403,18 @@ class BlueFS { void wait_for_aio(FileWriter *h); // safe to call without a lock #endif + int64_t _maybe_extend_log(); + void _extend_log(); + uint64_t _consume_dirty(); + void _clear_dirty_set_stable(uint64_t seq_stable); + void _release_pending_allocations(std::vector>& to_release); + + void _flush_and_sync_log_core(int64_t available_runway); + int _flush_and_sync_log_jump(uint64_t jump_to, + int64_t available_runway); int _flush_and_sync_log(std::unique_lock& l, - uint64_t want_seq = 0, - uint64_t jump_to = 0); + uint64_t want_seq = 0); + uint64_t _estimate_log_size(); bool _should_compact_log(); From 6b6a76a961db949c222de72f8fd5203ffac450d0 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Sat, 5 Jun 2021 20:50:46 +0200 Subject: [PATCH 0040/2866] os/bluestore/bluefs: Refactor _flush This refactor prepares _flush for fine-grain locks in BlueFS. Introduced _flush_special, a flush dedicated to bluefs special files (ino=1) and (ino=0). Function _flush no longer accepts these special files. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 42 ++++++++++++++++++++++++-------------- src/os/bluestore/BlueFS.h | 2 ++ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 5a2c50359fcc4..1d6ec6fd80c37 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2230,7 +2230,7 @@ void BlueFS::_rewrite_log_and_layout_sync(bool allocate_with_fallback, log_writer = _create_writer(log_file); log_writer->append(bl); - r = _flush(log_writer, true); + r = _flush_special(log_writer); ceph_assert(r == 0); #ifdef HAVE_LIBAIO if (!cct->_conf->bluefs_sync_write) { @@ -2367,7 +2367,7 @@ void BlueFS::_compact_log_async(std::unique_lock& l) new_log_writer->append(bl); // 3. flush - r = _flush(new_log_writer, true); + r = _flush_special(new_log_writer); ceph_assert(r == 0); // 4. wait @@ -2546,7 +2546,7 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway) log_t.clear(); log_t.seq = 0; // just so debug output is less confusing - int r = _flush(log_writer, true); + int r = _flush_special(log_writer); ceph_assert(r == 0); } @@ -2752,11 +2752,8 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) ceph_assert(h->file->num_readers.load() == 0); - bool buffered; - if (h->file->fnode.ino == 1) - buffered = false; - else - buffered = cct->_conf->bluefs_buffered_io; + bool buffered = cct->_conf->bluefs_buffered_io; + ceph_assert(h->file->fnode.ino > 1); if (offset + length <= h->pos) return 0; @@ -2777,7 +2774,6 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) if (allocated < offset + length) { // we should never run out of log space here; see the min runway check // in _flush_and_sync_log. - ceph_assert(h->file->fnode.ino != 1); int r = _allocate(vselector->select_prefer_bdev(h->file->vselector_hint), offset + length - allocated, &h->file->fnode); @@ -2793,15 +2789,14 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) } if (h->file->fnode.size < offset + length) { h->file->fnode.size = offset + length; - if (h->file->fnode.ino > 1) { - // we do not need to dirty the log file (or it's compacting - // replacement) when the file size changes because replay is - // smart enough to discover it on its own. - h->file->is_dirty = true; - } + h->file->is_dirty = true; } dout(20) << __func__ << " file now, unflushed " << h->file->fnode << dendl; + return _flush_data(h, offset, length, buffered); +} +int BlueFS::_flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered) +{ uint64_t x_off = 0; auto p = h->file->fnode.seek(offset, &x_off); ceph_assert(p != h->file->fnode.extents.end()); @@ -2948,6 +2943,23 @@ int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) return r; } +// Flush for bluefs special files. +// Does not add extents to h. +// Does not mark h as dirty. +// we do not need to dirty the log file (or it's compacting +// replacement) when the file size changes because replay is +// smart enough to discover it on its own. +int BlueFS::_flush_special(FileWriter *h) +{ + uint64_t length = h->get_buffer_length(); + uint64_t offset = h->pos; + ceph_assert(length + offset <= h->file->fnode.get_allocated()); + if (h->file->fnode.size < offset + length) { + h->file->fnode.size = offset + length; + } + return _flush_data(h, offset, length, false); +} + int BlueFS::_truncate(FileWriter *h, uint64_t offset) { dout(10) << __func__ << " 0x" << std::hex << offset << std::dec diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index d8cbdbe4f661e..a4cd65e4def29 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -394,8 +394,10 @@ class BlueFS { /* signal replay log to include h->file in nearest log flush */ int _signal_dirty_to_log(FileWriter *h); int _flush_range(FileWriter *h, uint64_t offset, uint64_t length); + int _flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered); int _flush(FileWriter *h, bool force, std::unique_lock& l); int _flush(FileWriter *h, bool force, bool *flushed = nullptr); + int _flush_special(FileWriter *h); int _fsync(FileWriter *h, std::unique_lock& l); #ifdef HAVE_LIBAIO From 08cf6d04a198a8dec3fc920a180208742c6fc965 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 29 Jun 2021 13:03:56 +0200 Subject: [PATCH 0041/2866] os/bluestore/bluefs: Split single bluefs lock into multiple smaller locks Splits bluefs lock into log, dirty, dirs, file and writer locks. This breaks severe locking issues, and makes bluefs more multithreaded. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 418 +++++++++++++++++++++++++------------ src/os/bluestore/BlueFS.h | 113 ++++------ 2 files changed, 326 insertions(+), 205 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 1d6ec6fd80c37..8934520d4710f 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -148,7 +148,7 @@ class BlueFS::SocketHook : public AdminSocketHook { out.append(ss); } else if (command == "bluefs files list") { const char* devnames[3] = {"wal","db","slow"}; - std::lock_guard l(bluefs->lock); + std::lock_guard l(bluefs->dirs_lock); f->open_array_section("files"); for (auto &d : bluefs->dir_map) { std::string dir = d.first; @@ -310,6 +310,7 @@ void BlueFS::_shutdown_logger() delete logger; } +//AK - TODO - locking needed but not certain void BlueFS::_update_logger_stats() { // we must be holding the lock @@ -392,7 +393,6 @@ void BlueFS::handle_discard(unsigned id, interval_set& to_release) uint64_t BlueFS::get_used() { - std::lock_guard l(lock); uint64_t used = 0; for (unsigned id = 0; id < MAX_BDEV; ++id) { used += _get_used(id); @@ -418,7 +418,6 @@ uint64_t BlueFS::get_used(unsigned id) { ceph_assert(id < alloc.size()); ceph_assert(alloc[id]); - std::lock_guard l(lock); return _get_used(id); } @@ -431,13 +430,11 @@ uint64_t BlueFS::_get_total(unsigned id) const uint64_t BlueFS::get_total(unsigned id) { - std::lock_guard l(lock); return _get_total(id); } uint64_t BlueFS::get_free(unsigned id) { - std::lock_guard l(lock); ceph_assert(id < alloc.size()); return alloc[id]->get_free(); } @@ -467,7 +464,7 @@ void BlueFS::dump_block_extents(ostream& out) int BlueFS::get_block_extents(unsigned id, interval_set *extents) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); dout(10) << __func__ << " bdev " << id << dendl; ceph_assert(id < alloc.size()); for (auto& p : file_map) { @@ -482,7 +479,6 @@ int BlueFS::get_block_extents(unsigned id, interval_set *extents) int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) { - std::unique_lock l(lock); dout(1) << __func__ << " osd_uuid " << osd_uuid << dendl; @@ -519,7 +515,7 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) // initial txn log_t.op_init(); - _flush_and_sync_log(l); + flush_and_sync_log(); // write supers super.log_fnode = log_file->fnode; @@ -868,7 +864,7 @@ int BlueFS::prepare_new_device(int id, const bluefs_layout_t& layout) new_log_dev_cur = BDEV_NEWDB; new_log_dev_next = BDEV_DB; } - _rewrite_log_and_layout_sync(false, + rewrite_log_and_layout_sync(false, BDEV_NEWDB, new_log_dev_cur, new_log_dev_next, @@ -876,7 +872,7 @@ int BlueFS::prepare_new_device(int id, const bluefs_layout_t& layout) layout); //} } else if(id == BDEV_NEWWAL) { - _rewrite_log_and_layout_sync(false, + rewrite_log_and_layout_sync(false, BDEV_DB, BDEV_NEWWAL, BDEV_WAL, @@ -907,7 +903,6 @@ void BlueFS::get_devices(set *ls) int BlueFS::fsck() { - std::lock_guard l(lock); dout(1) << __func__ << dendl; // hrm, i think we check everything on mount... return 0; @@ -1648,7 +1643,7 @@ int BlueFS::device_migrate_to_existing( new_log_dev_next; } - _rewrite_log_and_layout_sync( + rewrite_log_and_layout_sync( false, (flags & REMOVE_DB) ? BDEV_SLOW : BDEV_DB, new_log_dev_cur, @@ -1783,7 +1778,7 @@ int BlueFS::device_migrate_to_new( BDEV_DB : BDEV_SLOW; - _rewrite_log_and_layout_sync( + rewrite_log_and_layout_sync( false, super_dev, new_log_dev_cur, @@ -1813,12 +1808,16 @@ void BlueFS::_drop_link(FileRef file) dout(20) << __func__ << " had refs " << file->refs << " on " << file->fnode << dendl; ceph_assert(file->refs > 0); + ceph_assert(ceph_mutex_is_locked(log_lock)); + --file->refs; if (file->refs == 0) { dout(20) << __func__ << " destroying " << file->fnode << dendl; ceph_assert(file->num_reading.load() == 0); vselector->sub_usage(file->vselector_hint, file->fnode); log_t.op_file_remove(file->fnode.ino); + + std::lock_guard dl(dirty_lock); for (auto& r : file->fnode.extents) { pending_release[r.bdev].insert(r.offset, r.length); } @@ -1826,6 +1825,7 @@ void BlueFS::_drop_link(FileRef file) file->deleted = true; if (file->dirty_seq) { + // retract request to serialize changes ceph_assert(file->dirty_seq > log_seq_stable); ceph_assert(dirty_files.count(file->dirty_seq)); auto it = dirty_files[file->dirty_seq].iterator_to(*file); @@ -2049,8 +2049,9 @@ int64_t BlueFS::_read( return ret; } -void BlueFS::_invalidate_cache(FileRef f, uint64_t offset, uint64_t length) +void BlueFS::invalidate_cache(FileRef f, uint64_t offset, uint64_t length) { + std::lock_guard l(f->lock); dout(10) << __func__ << " file " << f->fnode << " 0x" << std::hex << offset << "~" << length << std::dec << dendl; @@ -2070,8 +2071,9 @@ void BlueFS::_invalidate_cache(FileRef f, uint64_t offset, uint64_t length) } } -uint64_t BlueFS::_estimate_log_size() +uint64_t BlueFS::estimate_log_size() { + std::lock_guard dirl(dirs_lock); int avg_dir_size = 40; // fixme int avg_file_size = 12; uint64_t size = 4096 * 2; @@ -2083,37 +2085,44 @@ uint64_t BlueFS::_estimate_log_size() void BlueFS::compact_log() { - std::unique_lock l(lock); if (!cct->_conf->bluefs_replay_recovery_disable_compact) { if (cct->_conf->bluefs_compact_log_sync) { - _compact_log_sync(); + compact_log_sync(); } else { - _compact_log_async(l); + compact_log_async(); } } } -bool BlueFS::_should_compact_log() +bool BlueFS::should_start_compact_log() { - uint64_t current = log_writer->file->fnode.size; - uint64_t expected = _estimate_log_size(); + if (log_is_compacting.load() == true) { + // compaction is already running + return false; + } + uint64_t current; + { + std::lock_guard dirl(log_lock); + current = log_writer->file->fnode.size; + } + uint64_t expected = estimate_log_size(); float ratio = (float)current / (float)expected; dout(10) << __func__ << " current 0x" << std::hex << current << " expected " << expected << std::dec << " ratio " << ratio - << (new_log ? " (async compaction in progress)" : "") << dendl; - if (new_log || - current < cct->_conf->bluefs_log_compact_min_size || + if (current < cct->_conf->bluefs_log_compact_min_size || ratio < cct->_conf->bluefs_log_compact_min_ratio) { return false; } return true; } -void BlueFS::_compact_log_dump_metadata(bluefs_transaction_t *t, +void BlueFS::compact_log_dump_metadata(bluefs_transaction_t *t, int flags) { + std::lock_guard dirl(dirs_lock); + t->seq = 1; t->uuid = super.uuid; dout(20) << __func__ << " op_init" << dendl; @@ -2123,7 +2132,7 @@ void BlueFS::_compact_log_dump_metadata(bluefs_transaction_t *t, if (ino == 1) continue; ceph_assert(ino > 1); - + //AK - TODO - touching fnode - need to lock for(auto& e : file_ref->fnode.extents) { auto bdev = e.bdev; auto bdev_new = bdev; @@ -2160,12 +2169,12 @@ void BlueFS::_compact_log_dump_metadata(bluefs_transaction_t *t, } } -void BlueFS::_compact_log_sync() +void BlueFS::compact_log_sync() { dout(10) << __func__ << dendl; auto prefer_bdev = vselector->select_prefer_bdev(log_writer->file->vselector_hint); - _rewrite_log_and_layout_sync(true, + rewrite_log_and_layout_sync(true, BDEV_DB, prefer_bdev, prefer_bdev, @@ -2174,13 +2183,16 @@ void BlueFS::_compact_log_sync() logger->inc(l_bluefs_log_compactions); } -void BlueFS::_rewrite_log_and_layout_sync(bool allocate_with_fallback, - int super_dev, - int log_dev, - int log_dev_new, - int flags, - std::optional layout) +void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, + int super_dev, + int log_dev, + int log_dev_new, + int flags, + std::optional layout) { + //ceph_assert(ceph_mutex_is_notlocked(log_lock)); + std::lock_guard ll(log_lock); + File *log_file = log_writer->file.get(); // clear out log (be careful who calls us!!!) @@ -2192,7 +2204,7 @@ void BlueFS::_rewrite_log_and_layout_sync(bool allocate_with_fallback, << " flags:" << flags << dendl; bluefs_transaction_t t; - _compact_log_dump_metadata(&t, flags); + compact_log_dump_metadata(&t, flags); dout(20) << __func__ << " op_jump_seq " << log_seq << dendl; t.op_jump_seq(log_seq); @@ -2258,6 +2270,7 @@ void BlueFS::_rewrite_log_and_layout_sync(bool allocate_with_fallback, flush_bdev(); dout(10) << __func__ << " release old log extents " << old_fnode.extents << dendl; + std::lock_guard dl(dirty_lock); for (auto& r : old_fnode.extents) { pending_release[r.bdev].insert(r.offset, r.length); } @@ -2285,29 +2298,42 @@ void BlueFS::_rewrite_log_and_layout_sync(bool allocate_with_fallback, * * 8. Release the old log space. Clean up. */ -void BlueFS::_compact_log_async(std::unique_lock& l) + +void BlueFS::compact_log_async() { dout(10) << __func__ << dendl; + // only one compaction allowed at one time + bool old_is_comp = std::atomic_exchange(&log_is_compacting, true); + if (old_is_comp) { + dout(10) << __func__ << " ongoing" <file.get(); - ceph_assert(!new_log); - ceph_assert(!new_log_writer); + FileWriter *new_log_writer = nullptr; + FileRef new_log = nullptr; + uint64_t new_log_jump_to = 0; + uint64_t old_log_jump_to = 0; - // create a new log [writer] so that we know compaction is in progress - // (see _should_compact_log) new_log = ceph::make_ref(); - new_log->fnode.ino = 0; // so that _flush_range won't try to log the fnode + new_log->fnode.ino = 0; // we use _flush_special to avoid log of the fnode - // 0. wait for any racing flushes to complete. (We do not want to block - // in _flush_sync_log with jump_to set or else a racing thread might flush - // our entries and our jump_to update won't be correct.) - while (log_flushing) { - dout(10) << __func__ << " log is currently flushing, waiting" << dendl; - log_cond.wait(l); - } + // Part 1. + // Prepare current log for jumping into it. + // 1. Allocate extent + // 2. Update op to log + // 3. Jump op to log + // During that, no one else can write to log, otherwise we risk jumping backwards. + // We need to sync log, because we are injecting discontinuity, and writer is not prepared for that. + + //signal _maybe_extend_log that expansion of log is temporary inacceptable + bool old_forbidden = atomic_exchange(&log_forbidden_to_expand, true); + ceph_assert(old_forbidden == false); vselector->sub_usage(log_file->vselector_hint, log_file->fnode); - // 1. allocate new log space and jump to it. + // 1.1 allocate new log space and jump to it. old_log_jump_to = log_file->fnode.get_allocated(); uint64_t runway = log_file->fnode.get_allocated() - log_writer->get_effective_write_pos(); dout(10) << __func__ << " old_log_jump_to 0x" << std::hex << old_log_jump_to @@ -2322,21 +2348,24 @@ void BlueFS::_compact_log_async(std::unique_lock& l) // update the log file change and log a jump to the offset where we want to // write the new entries - log_t.op_file_update(log_file->fnode); - log_t.op_jump(log_seq, old_log_jump_to); + log_t.op_file_update(log_file->fnode); // 1.2 + log_t.op_jump(log_seq, old_log_jump_to); // 1.3 // we need to flush all bdev because we will be streaming all dirty files to log // TODO - think - if _flush_and_sync_log_jump will not add dirty files nor release pending allocations // then flush_bdev() will not be necessary flush_bdev(); - _flush_and_sync_log_jump(old_log_jump_to, runway); + log_lock.unlock(); + // out of jump section - now log can be used to write to + // 2. prepare compacted log bluefs_transaction_t t; - //avoid record two times in log_t and _compact_log_dump_metadata. - log_t.clear(); - _compact_log_dump_metadata(&t, 0); + //this needs files lock + //what will happen, if a file is modified *twice* before we stream it to log? + //the later state that we capture will be seen earlier and replay will see a temporary retraction (!) + compact_log_dump_metadata(&t, 0); uint64_t max_alloc_size = std::max(alloc_size[BDEV_WAL], std::max(alloc_size[BDEV_DB], @@ -2354,7 +2383,7 @@ void BlueFS::_compact_log_async(std::unique_lock& l) ceph_assert(r == 0); // we might have some more ops in log_t due to _allocate call - t.claim_ops(log_t); + // t.claim_ops(log_t); no we no longer track allocations in log bufferlist bl; encode(t, bl); @@ -2364,15 +2393,16 @@ void BlueFS::_compact_log_async(std::unique_lock& l) << std::dec << dendl; new_log_writer = _create_writer(new_log); - new_log_writer->append(bl); + new_log_writer->append(bl); + new_log_writer->lock.lock(); // 3. flush r = _flush_special(new_log_writer); ceph_assert(r == 0); // 4. wait - _flush_bdev_safely(new_log_writer); - + _flush_bdev(new_log_writer); + new_log_writer->lock.unlock(); // 5. update our log fnode // discard first old_log_jump_to extents @@ -2424,29 +2454,42 @@ void BlueFS::_compact_log_async(std::unique_lock& l) ++super.version; _write_super(BDEV_DB); - lock.unlock(); flush_bdev(); - lock.lock(); + + old_forbidden = atomic_exchange(&log_forbidden_to_expand, false); + ceph_assert(old_forbidden == true); + //to wake up if someone was in need of expanding log + log_cond.notify_all(); // 7. release old space dout(10) << __func__ << " release old log extents " << old_extents << dendl; - for (auto& r : old_extents) { - pending_release[r.bdev].insert(r.offset, r.length); + { + std::lock_guard dl(dirty_lock); + for (auto& r : old_extents) { + pending_release[r.bdev].insert(r.offset, r.length); + } } // delete the new log, remove from the dirty files list _close_writer(new_log_writer); + // flush_special does not dirty files + /* if (new_log->dirty_seq) { + std::lock_guard dl(dirty_lock); ceph_assert(dirty_files.count(new_log->dirty_seq)); auto it = dirty_files[new_log->dirty_seq].iterator_to(*new_log); dirty_files[new_log->dirty_seq].erase(it); } + */ new_log_writer = nullptr; new_log = nullptr; log_cond.notify_all(); dout(10) << __func__ << " log extents " << log_file->fnode.extents << dendl; logger->inc(l_bluefs_log_compactions); + + old_is_comp = atomic_exchange(&log_is_compacting, false); + ceph_assert(old_is_comp); } void BlueFS::_pad_bl(bufferlist& bl) @@ -2459,10 +2502,26 @@ void BlueFS::_pad_bl(bufferlist& bl) } } +/** + * Adds file modifications from `dirty_files` to bluefs transactions + * already stored in `log_t`. Writes them to disk and waits until are stable. + * Guarantees that file modifications with `want_seq` are already stable on disk. + * In addition may insert jump forward transaction to log write position `jump_to`. + * + * it should lock ability to: + * 1) add to log_t + * 2) modify dirty_files + * 3) add to pending_release + * + * pending_release and log_t go with same lock + */ + // Adds to log_t file modifications mentioned in `dirty_files`. // Note: some bluefs ops may have already been stored in log_t transaction. uint64_t BlueFS::_consume_dirty() { + ceph_assert(ceph_mutex_is_locked(dirty_lock)); + ceph_assert(ceph_mutex_is_locked(log_lock)); //acquire new seq // this will became log_seq_stable once we write uint64_t seq = log_t.seq = ++log_seq; @@ -2485,6 +2544,7 @@ uint64_t BlueFS::_consume_dirty() // Returns space available *BEFORE* adding new space. Signed for additional <0 detection. int64_t BlueFS::_maybe_extend_log() { + ceph_assert(ceph_mutex_is_locked(log_lock)); // allocate some more space (before we run out)? // BTW: this triggers `flush()` in the `page_aligned_appender` of `log_writer`. int64_t runway = log_writer->file->fnode.get_allocated() - @@ -2507,7 +2567,7 @@ int64_t BlueFS::_maybe_extend_log() * - re-run compaction with more runway for old log * - add OP_FILE_ADDEXT that adds extent; will be compatible with both logs */ - if (new_log_writer) { + if (log_forbidden_to_expand.load() == true) { return -EWOULDBLOCK; } vselector->sub_usage(log_writer->file->vselector_hint, log_writer->file->fnode); @@ -2524,6 +2584,7 @@ int64_t BlueFS::_maybe_extend_log() void BlueFS::_flush_and_sync_log_core(int64_t runway) { + ceph_assert(ceph_mutex_is_locked(log_lock)); dout(10) << __func__ << " " << log_t << dendl; bufferlist bl; @@ -2551,8 +2612,10 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway) } // Clears dirty_files up to (including) seq_stable. -void BlueFS::_clear_dirty_set_stable(uint64_t seq) +void BlueFS::clear_dirty_set_stable(uint64_t seq) { + std::lock_guard lg(dirty_lock); + // clean dirty files if (seq > log_seq_stable) { log_seq_stable = seq; @@ -2586,7 +2649,7 @@ void BlueFS::_clear_dirty_set_stable(uint64_t seq) } } -void BlueFS::_release_pending_allocations(vector>& to_release) +void BlueFS::release_pending_allocations(vector>& to_release) { for (unsigned i = 0; i < to_release.size(); ++i) { if (!to_release[i].empty()) { @@ -2609,36 +2672,49 @@ void BlueFS::_release_pending_allocations(vector>& to_rel } } -int BlueFS::_flush_and_sync_log(std::unique_lock& l, - uint64_t want_seq) +int BlueFS::flush_and_sync_log(uint64_t want_seq) { - if (want_seq && want_seq <= log_seq_stable) { - dout(10) << __func__ << " want_seq " << want_seq << " <= log_seq_stable " - << log_seq_stable << ", done" << dendl; - return 0; - } + // we synchronize writing to log, by lock to log_lock int64_t available_runway; do { + log_lock.lock(); + dirty_lock.lock(); + if (want_seq && want_seq <= log_seq_stable) { + dout(10) << __func__ << " want_seq " << want_seq << " <= log_seq_stable " + << log_seq_stable << ", done" << dendl; + dirty_lock.unlock(); + log_lock.unlock(); + return 0; + } + available_runway = _maybe_extend_log(); if (available_runway == -EWOULDBLOCK) { - while (new_log_writer) { - dout(10) << __func__ << " waiting for async compaction" << dendl; - log_cond.wait(l); + // we are in need of adding runway, but we are during log-switch from compaction + dirty_lock.unlock(); + //instead log_lock_unlock() do move ownership + std::unique_lock ll(log_lock, std::adopt_lock); + while (log_forbidden_to_expand.load()) { + log_cond.wait(ll); } + } else { + ceph_assert(available_runway >= 0); } - } while (available_runway == -EWOULDBLOCK); + } while (available_runway < 0); ceph_assert(want_seq == 0 || want_seq <= log_seq + 1); // illegal to request seq that was not created yet uint64_t seq = _consume_dirty(); vector> to_release(pending_release.size()); to_release.swap(pending_release); + dirty_lock.unlock(); _flush_and_sync_log_core(available_runway); - _flush_bdev_safely(log_writer); + _flush_bdev(log_writer); + //now log_lock is no longer needed + log_lock.unlock(); - _clear_dirty_set_stable(seq); - _release_pending_allocations(to_release); + clear_dirty_set_stable(seq); + release_pending_allocations(to_release); _update_logger_stats(); return 0; @@ -2648,11 +2724,16 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, int64_t available_runway) { + ceph_assert(ceph_mutex_is_locked(log_lock)); + ceph_assert(jump_to); + // we synchronize writing to log, by lock to log_lock + + dirty_lock.lock(); uint64_t seq = _consume_dirty(); vector> to_release(pending_release.size()); to_release.swap(pending_release); - + dirty_lock.unlock(); _flush_and_sync_log_core(available_runway); dout(10) << __func__ << " jumping log offset from 0x" << std::hex @@ -2662,10 +2743,10 @@ int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, log_writer->file->fnode.size = jump_to; vselector->add_usage(log_writer->file->vselector_hint, log_writer->file->fnode.size); - _flush_bdev_safely(log_writer); + _flush_bdev(log_writer); - _clear_dirty_set_stable(seq); - _release_pending_allocations(to_release); + clear_dirty_set_stable(seq); + release_pending_allocations(to_release); _update_logger_stats(); return 0; @@ -2677,6 +2758,7 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer( const unsigned length, const bluefs_super_t& super) { + ceph_assert(ceph_mutex_is_locked(this->lock) || file->fnode.ino == 1); ceph::bufferlist bl; if (partial) { tail_block.splice(0, tail_block.length(), &bl); @@ -2715,6 +2797,8 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer( int BlueFS::_signal_dirty_to_log(FileWriter *h) { + ceph_assert(ceph_mutex_is_locked(h->lock)); + std::lock_guard dl(dirty_lock); h->file->fnode.mtime = ceph_clock_now(); ceph_assert(h->file->fnode.ino >= 1); if (h->file->dirty_seq == 0) { @@ -2740,8 +2824,14 @@ int BlueFS::_signal_dirty_to_log(FileWriter *h) return 0; } +void BlueFS::flush_range(FileWriter *h, uint64_t offset, uint64_t length) { + std::unique_lock hl(h->lock); + _flush_range(h, offset, length); +} + int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) { + ceph_assert(ceph_mutex_is_locked(h->lock)); dout(10) << __func__ << " " << h << " pos 0x" << std::hex << h->pos << " 0x" << offset << "~" << length << std::dec << " to " << h->file->fnode << dendl; @@ -2764,13 +2854,13 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) << std::hex << offset << "~" << length << std::dec << dendl; } + std::lock_guard file_lock(h->file->lock); ceph_assert(offset <= h->file->fnode.size); uint64_t allocated = h->file->fnode.get_allocated(); vselector->sub_usage(h->file->vselector_hint, h->file->fnode); // do not bother to dirty the file if we are overwriting // previously allocated extents. - if (allocated < offset + length) { // we should never run out of log space here; see the min runway check // in _flush_and_sync_log. @@ -2791,12 +2881,15 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) h->file->fnode.size = offset + length; h->file->is_dirty = true; } + dout(20) << __func__ << " file now, unflushed " << h->file->fnode << dendl; return _flush_data(h, offset, length, buffered); } int BlueFS::_flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered) { + //ceph_assert(ceph_mutex_is_locked(h->lock)); + //ceph_assert(ceph_mutex_is_locked(h->file->lock)); uint64_t x_off = 0; auto p = h->file->fnode.seek(offset, &x_off); ceph_assert(p != h->file->fnode.extents.end()); @@ -2903,18 +2996,49 @@ void BlueFS::wait_for_aio(FileWriter *h) } #endif -int BlueFS::_flush(FileWriter *h, bool force, std::unique_lock& l) + +void BlueFS::append_try_flush(FileWriter *h, const char* buf, size_t len) +{ + std::unique_lock hl(h->lock); + size_t max_size = 1ull << 30; // cap to 1GB + while (len > 0) { + bool need_flush = true; + auto l0 = h->get_buffer_length(); + if (l0 < max_size) { + size_t l = std::min(len, max_size - l0); + h->append(buf, l); + buf += l; + len -= l; + need_flush = h->get_buffer_length() >= cct->_conf->bluefs_min_flush_size; + } + if (need_flush) { + bool flushed = false; + int r = _flush(h, true, &flushed); + ceph_assert(r == 0); + if (r == 0 && flushed) { + maybe_compact_log(); + } + // make sure we've made any progress with flush hence the + // loop doesn't iterate forever + ceph_assert(h->get_buffer_length() < max_size); + } + } +} + +void BlueFS::flush(FileWriter *h, bool force) { + std::unique_lock hl(h->lock); bool flushed = false; int r = _flush(h, force, &flushed); + ceph_assert(r == 0); if (r == 0 && flushed) { - _maybe_compact_log(l); + maybe_compact_log(); } - return r; } int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) { + ceph_assert(ceph_mutex_is_locked(h->lock)); uint64_t length = h->get_buffer_length(); uint64_t offset = h->pos; if (flushed) { @@ -2951,6 +3075,8 @@ int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) // smart enough to discover it on its own. int BlueFS::_flush_special(FileWriter *h) { + //ceph_assert(ceph_mutex_is_locked(h->lock)); + //ceph_assert(ceph_mutex_is_locked(h->file->lock)); uint64_t length = h->get_buffer_length(); uint64_t offset = h->pos; ceph_assert(length + offset <= h->file->fnode.get_allocated()); @@ -2960,8 +3086,9 @@ int BlueFS::_flush_special(FileWriter *h) return _flush_data(h, offset, length, false); } -int BlueFS::_truncate(FileWriter *h, uint64_t offset) +int BlueFS::truncate(FileWriter *h, uint64_t offset) { + std::lock_guard hl(h->lock); dout(10) << __func__ << " 0x" << std::hex << offset << std::dec << " file " << h->file->fnode << dendl; if (h->file->deleted) { @@ -2994,12 +3121,14 @@ int BlueFS::_truncate(FileWriter *h, uint64_t offset) vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size); h->file->fnode.size = offset; vselector->add_usage(h->file->vselector_hint, h->file->fnode.size); + std::lock_guard ll(log_lock); log_t.op_file_update(h->file->fnode); return 0; } -int BlueFS::_fsync(FileWriter *h, std::unique_lock& l) +int BlueFS::fsync(FileWriter *h) { + std::unique_lock hl(h->lock); dout(10) << __func__ << " " << h << " " << h->file->fnode << dendl; int r = _flush(h, true); if (r < 0) @@ -3010,39 +3139,40 @@ int BlueFS::_fsync(FileWriter *h, std::unique_lock& l) } uint64_t old_dirty_seq = h->file->dirty_seq; - _flush_bdev_safely(h); + _flush_bdev(h); if (old_dirty_seq) { uint64_t s = log_seq; dout(20) << __func__ << " file metadata was dirty (" << old_dirty_seq << ") on " << h->file->fnode << ", flushing log" << dendl; - _flush_and_sync_log(l, old_dirty_seq); + flush_and_sync_log(old_dirty_seq); + // AK - TODO - think - how can dirty_seq change if we are under h lock? ceph_assert(h->file->dirty_seq == 0 || // cleaned - h->file->dirty_seq > s); // or redirtied by someone else + h->file->dirty_seq > s); // or redirtied by someone else } + maybe_compact_log(); return 0; } -void BlueFS::_flush_bdev_safely(FileWriter *h) +// be careful - either h->file->lock or log_lock must be taken +void BlueFS::_flush_bdev(FileWriter *h) { + if (h->file->fnode.ino != 1) { + ceph_assert(ceph_mutex_is_locked(h->lock)); + } else { + ceph_assert(ceph_mutex_is_locked(log_lock)); + } std::array flush_devs = h->dirty_devs; h->dirty_devs.fill(false); #ifdef HAVE_LIBAIO if (!cct->_conf->bluefs_sync_write) { list completed_ios; _claim_completed_aios(h, &completed_ios); - lock.unlock(); wait_for_aio(h); completed_ios.clear(); - flush_bdev(flush_devs); - lock.lock(); - } else -#endif - { - lock.unlock(); - flush_bdev(flush_devs); - lock.lock(); } +#endif + flush_bdev(flush_devs); } void BlueFS::flush_bdev(std::array& dirty_bdevs) @@ -3176,8 +3306,9 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, return 0; } -int BlueFS::_preallocate(FileRef f, uint64_t off, uint64_t len) +int BlueFS::preallocate(FileRef f, uint64_t off, uint64_t len) { + std::lock_guard fl(f->lock); dout(10) << __func__ << " file " << f->fnode << " 0x" << std::hex << off << "~" << len << std::dec << dendl; if (f->deleted) { @@ -3196,6 +3327,7 @@ int BlueFS::_preallocate(FileRef f, uint64_t off, uint64_t len) vselector->add_usage(f->vselector_hint, f->fnode); if (r < 0) return r; + std::lock_guard ll(log_lock); log_t.op_file_update(f->fnode); } return 0; @@ -3203,8 +3335,13 @@ int BlueFS::_preallocate(FileRef f, uint64_t off, uint64_t len) void BlueFS::sync_metadata(bool avoid_compact) { - std::unique_lock l(lock); - if (log_t.empty() && dirty_files.empty()) { + bool can_skip_flush; + { + std::lock_guard ll(log_lock); + std::lock_guard dl(dirty_lock); + can_skip_flush = log_t.empty() && dirty_files.empty(); + } + if (can_skip_flush) { dout(10) << __func__ << " - no pending log events" << dendl; } else { utime_t start; @@ -3212,23 +3349,23 @@ void BlueFS::sync_metadata(bool avoid_compact) start = ceph_clock_now(); *_dout << dendl; flush_bdev(); // FIXME? - _flush_and_sync_log(l); + flush_and_sync_log(); dout(10) << __func__ << " done in " << (ceph_clock_now() - start) << dendl; } if (!avoid_compact) { - _maybe_compact_log(l); + maybe_compact_log(); } } -void BlueFS::_maybe_compact_log(std::unique_lock& l) +void BlueFS::maybe_compact_log() { if (!cct->_conf->bluefs_replay_recovery_disable_compact && - _should_compact_log()) { + should_start_compact_log()) { if (cct->_conf->bluefs_compact_log_sync) { - _compact_log_sync(); + compact_log_sync(); } else { - _compact_log_async(l); + compact_log_async(); } } } @@ -3239,7 +3376,7 @@ int BlueFS::open_for_write( FileWriter **h, bool overwrite) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = dir_map.find(dirname); DirRef dir; @@ -3301,11 +3438,12 @@ int BlueFS::open_for_write( dout(20) << __func__ << " mapping " << dirname << "/" << filename << " vsel_hint " << file->vselector_hint << dendl; - - log_t.op_file_update(file->fnode); - if (create) - log_t.op_dir_link(dirname, filename, file->fnode.ino); - + { + std::lock_guard ll(log_lock); + log_t.op_file_update(file->fnode); + if (create) + log_t.op_dir_link(dirname, filename, file->fnode.ino); + } *h = _create_writer(file); if (boost::algorithm::ends_with(filename, ".log")) { @@ -3335,7 +3473,7 @@ BlueFS::FileWriter *BlueFS::_create_writer(FileRef f) return w; } -void BlueFS::_close_writer(FileWriter *h) +void BlueFS::_drain_writer(FileWriter *h) { dout(10) << __func__ << " " << h << " type " << h->writer_type << dendl; //h->buffer.reassign_to_mempool(mempool::mempool_bluefs_file_writer); @@ -3351,18 +3489,31 @@ void BlueFS::_close_writer(FileWriter *h) if (h->file->fnode.size >= (1ull << 30)) { dout(10) << __func__ << " file is unexpectedly large:" << h->file->fnode << dendl; } +} + +void BlueFS::_close_writer(FileWriter *h) +{ + _drain_writer(h); + delete h; +} +void BlueFS::close_writer(FileWriter *h) +{ + { + std::lock_guard l(h->lock); + _drain_writer(h); + } delete h; } uint64_t BlueFS::debug_get_dirty_seq(FileWriter *h) { - std::lock_guard l(lock); + std::lock_guard l(h->lock); return h->file->dirty_seq; } bool BlueFS::debug_get_is_dev_dirty(FileWriter *h, uint8_t dev) { - std::lock_guard l(lock); + std::lock_guard l(h->lock); return h->dirty_devs[dev]; } @@ -3372,7 +3523,7 @@ int BlueFS::open_for_read( FileReader **h, bool random) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); dout(10) << __func__ << " " << dirname << "/" << filename << (random ? " (random)":" (sequential)") << dendl; map::iterator p = dir_map.find(dirname); @@ -3401,7 +3552,8 @@ int BlueFS::rename( std::string_view old_dirname, std::string_view old_filename, std::string_view new_dirname, std::string_view new_filename) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); + std::lock_guard ll(log_lock); dout(10) << __func__ << " " << old_dirname << "/" << old_filename << " -> " << new_dirname << "/" << new_filename << dendl; map::iterator p = dir_map.find(old_dirname); @@ -3448,7 +3600,8 @@ int BlueFS::rename( int BlueFS::mkdir(std::string_view dirname) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); + std::lock_guard ll(log_lock); dout(10) << __func__ << " " << dirname << dendl; map::iterator p = dir_map.find(dirname); if (p != dir_map.end()) { @@ -3462,7 +3615,8 @@ int BlueFS::mkdir(std::string_view dirname) int BlueFS::rmdir(std::string_view dirname) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); + std::lock_guard ll(log_lock); dout(10) << __func__ << " " << dirname << dendl; auto p = dir_map.find(dirname); if (p == dir_map.end()) { @@ -3481,7 +3635,7 @@ int BlueFS::rmdir(std::string_view dirname) bool BlueFS::dir_exists(std::string_view dirname) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); map::iterator p = dir_map.find(dirname); bool exists = p != dir_map.end(); dout(10) << __func__ << " " << dirname << " = " << (int)exists << dendl; @@ -3491,7 +3645,7 @@ bool BlueFS::dir_exists(std::string_view dirname) int BlueFS::stat(std::string_view dirname, std::string_view filename, uint64_t *size, utime_t *mtime) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = dir_map.find(dirname); if (p == dir_map.end()) { @@ -3519,7 +3673,7 @@ int BlueFS::stat(std::string_view dirname, std::string_view filename, int BlueFS::lock_file(std::string_view dirname, std::string_view filename, FileLock **plock) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = dir_map.find(dirname); if (p == dir_map.end()) { @@ -3539,6 +3693,7 @@ int BlueFS::lock_file(std::string_view dirname, std::string_view filename, file_map[ino_last] = file; dir->file_map[string{filename}] = file; ++file->refs; + std::lock_guard ll(log_lock); log_t.op_file_update(file->fnode); log_t.op_dir_link(dirname, filename, file->fnode.ino); } else { @@ -3557,7 +3712,7 @@ int BlueFS::lock_file(std::string_view dirname, std::string_view filename, int BlueFS::unlock_file(FileLock *fl) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); dout(10) << __func__ << " " << fl << " on " << fl->file->fnode << dendl; ceph_assert(fl->file->locked); fl->file->locked = false; @@ -3571,7 +3726,7 @@ int BlueFS::readdir(std::string_view dirname, vector *ls) if (!dirname.empty() && dirname.back() == '/') { dirname.remove_suffix(1); } - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); dout(10) << __func__ << " " << dirname << dendl; if (dirname.empty()) { // list dirs @@ -3599,7 +3754,8 @@ int BlueFS::readdir(std::string_view dirname, vector *ls) int BlueFS::unlink(std::string_view dirname, std::string_view filename) { - std::lock_guard l(lock); + std::lock_guard dirl(dirs_lock); + std::lock_guard ll(log_lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = dir_map.find(dirname); if (p == dir_map.end()) { diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index a4cd65e4def29..4f368841ff4d9 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -121,6 +121,11 @@ class BlueFS { std::atomic_int num_reading; void* vselector_hint = nullptr; + /* lock protects fnode and other the parts that can be modified during read & write operations. + Does not protect values that are fixed + Does not need to be taken when doing one-time operations: + _replay, device_migrate_to_existing, device_migrate_to_new */ + ceph::mutex lock = ceph::make_mutex("BlueFS::File::lock"); private: FRIEND_MAKE_REF(File); @@ -304,8 +309,9 @@ class BlueFS { }; private: - ceph::mutex lock = ceph::make_mutex("BlueFS::lock"); - + ceph::mutex log_lock = ceph::make_mutex("BlueFS::log_lock"); + ceph::mutex dirs_lock = ceph::make_mutex("BlueFS::dirs_lock"); + ceph::mutex dirty_lock = ceph::make_mutex("BlueFS::dirty_lock"); PerfCounters *logger = nullptr; uint64_t max_bytes[MAX_BDEV] = {0}; @@ -329,13 +335,12 @@ class BlueFS { FileWriter *log_writer = 0; ///< writer for the log bluefs_transaction_t log_t; ///< pending, unwritten log transaction bool log_flushing = false; ///< true while flushing the log - ceph::condition_variable log_cond; - - uint64_t new_log_jump_to = 0; - uint64_t old_log_jump_to = 0; - FileRef new_log = nullptr; - FileWriter *new_log_writer = nullptr; + ceph::condition_variable log_cond; ///< used for state control between log flush / log compaction + ceph::mutex cond_lock = ceph::make_mutex("BlueFS::cond_lock"); ///< .... + std::atomic log_is_compacting{false}; ///< signals that bluefs log is already ongoing compaction + std::atomic log_forbidden_to_expand{false}; ///< used to signal that async compaction is in state + /// that prohibits expansion of bluefs log /* * There are up to 3 block devices: * @@ -349,6 +354,11 @@ class BlueFS { std::vector alloc; ///< allocators for bdevs std::vector alloc_size; ///< alloc size for each device std::vector> pending_release; ///< extents to release + // TODO: it should be examined what makes pending_release immune to + // eras in a way similar to dirty_files. Hints: + // 1) we have actually only 2 eras: log_seq and log_seq+1 + // 2) we usually not remove extents from files. And when we do, we force log-syncing. + //std::vector> block_unused_too_granular; BlockDevice::aio_callback_t discard_cb[3]; //discard callbacks for each dev @@ -395,10 +405,9 @@ class BlueFS { int _signal_dirty_to_log(FileWriter *h); int _flush_range(FileWriter *h, uint64_t offset, uint64_t length); int _flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered); - int _flush(FileWriter *h, bool force, std::unique_lock& l); int _flush(FileWriter *h, bool force, bool *flushed = nullptr); int _flush_special(FileWriter *h); - int _fsync(FileWriter *h, std::unique_lock& l); + int _fsync(FileWriter *h); #ifdef HAVE_LIBAIO void _claim_completed_aios(FileWriter *h, std::list *ls); @@ -414,11 +423,10 @@ class BlueFS { void _flush_and_sync_log_core(int64_t available_runway); int _flush_and_sync_log_jump(uint64_t jump_to, int64_t available_runway); - int _flush_and_sync_log(std::unique_lock& l, - uint64_t want_seq = 0); + int flush_and_sync_log(uint64_t want_seq = 0); - uint64_t _estimate_log_size(); - bool _should_compact_log(); + uint64_t estimate_log_size(); + bool should_start_compact_log(); enum { REMOVE_DB = 1, @@ -426,12 +434,12 @@ class BlueFS { RENAME_SLOW2DB = 4, RENAME_DB2SLOW = 8, }; - void _compact_log_dump_metadata(bluefs_transaction_t *t, - int flags); - void _compact_log_sync(); - void _compact_log_async(std::unique_lock& l); + void compact_log_dump_metadata(bluefs_transaction_t *t, + int flags); + void compact_log_sync(); + void compact_log_async(); - void _rewrite_log_and_layout_sync(bool allocate_with_fallback, + void rewrite_log_and_layout_sync(bool allocate_with_fallback, int super_dev, int log_dev, int new_log_dev, @@ -440,7 +448,7 @@ class BlueFS { //void _aio_finish(void *priv); - void _flush_bdev_safely(FileWriter *h); + void _flush_bdev(FileWriter *h); void flush_bdev(); // this is safe to call without a lock void flush_bdev(std::array& dirty_bdevs); // this is safe to call without a lock @@ -459,8 +467,6 @@ class BlueFS { uint64_t len, ///< [in] this many bytes char *out); ///< [out] optional: or copy it here - void _invalidate_cache(FileRef f, uint64_t offset, uint64_t length); - int _open_super(); int _write_super(int dev); int _check_allocations(const bluefs_fnode_t& fnode, @@ -473,6 +479,7 @@ class BlueFS { int _replay(bool noop, bool to_stdout = false); ///< replay journal FileWriter *_create_writer(FileRef f); + void _drain_writer(FileWriter *h); void _close_writer(FileWriter *h); // always put the super in the second 4k block. FIXME should this be @@ -538,10 +545,8 @@ class BlueFS { FileReader **h, bool random = false); - void close_writer(FileWriter *h) { - std::lock_guard l(lock); - _close_writer(h); - } + // data added after last fsync() is lost + void close_writer(FileWriter *h); int rename(std::string_view old_dir, std::string_view old_file, std::string_view new_dir, std::string_view new_file); @@ -565,7 +570,7 @@ class BlueFS { /// sync any uncommitted state to disk void sync_metadata(bool avoid_compact); /// test and compact log, if necessary - void _maybe_compact_log(std::unique_lock& l); + void maybe_compact_log(); void set_volume_selector(BlueFSVolumeSelector* s) { vselector.reset(s); @@ -587,42 +592,11 @@ class BlueFS { // handler for discard event void handle_discard(unsigned dev, interval_set& to_release); - void flush(FileWriter *h, bool force = false) { - std::unique_lock l(lock); - int r = _flush(h, force, l); - ceph_assert(r == 0); - } + void flush(FileWriter *h, bool force = false); - void append_try_flush(FileWriter *h, const char* buf, size_t len) { - size_t max_size = 1ull << 30; // cap to 1GB - while (len > 0) { - bool need_flush = true; - auto l0 = h->get_buffer_length(); - if (l0 < max_size) { - size_t l = std::min(len, max_size - l0); - h->append(buf, l); - buf += l; - len -= l; - need_flush = h->get_buffer_length() >= cct->_conf->bluefs_min_flush_size; - } - if (need_flush) { - flush(h, true); - // make sure we've made any progress with flush hence the - // loop doesn't iterate forever - ceph_assert(h->get_buffer_length() < max_size); - } - } - } - void flush_range(FileWriter *h, uint64_t offset, uint64_t length) { - std::lock_guard l(lock); - _flush_range(h, offset, length); - } - int fsync(FileWriter *h) { - std::unique_lock l(lock); - int r = _fsync(h, l); - _maybe_compact_log(l); - return r; - } + void append_try_flush(FileWriter *h, const char* buf, size_t len); + void flush_range(FileWriter *h, uint64_t offset, uint64_t length); + int fsync(FileWriter *h); int64_t read(FileReader *h, uint64_t offset, size_t len, ceph::buffer::list *outbl, char *out) { // no need to hold the global lock here; we only touch h and @@ -637,18 +611,9 @@ class BlueFS { // atomics and asserts). return _read_random(h, offset, len, out); } - void invalidate_cache(FileRef f, uint64_t offset, uint64_t len) { - std::lock_guard l(lock); - _invalidate_cache(f, offset, len); - } - int preallocate(FileRef f, uint64_t offset, uint64_t len) { - std::lock_guard l(lock); - return _preallocate(f, offset, len); - } - int truncate(FileWriter *h, uint64_t offset) { - std::lock_guard l(lock); - return _truncate(h, offset); - } + void invalidate_cache(FileRef f, uint64_t offset, uint64_t len); + int preallocate(FileRef f, uint64_t offset, uint64_t len); + int truncate(FileWriter *h, uint64_t offset); int do_replay_recovery_read(FileReader *log, size_t log_pos, size_t read_offset, From 45b3c5d0ea6c737ae6bf742387f5746b65eeb88d Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 22 Jun 2021 13:15:21 +0200 Subject: [PATCH 0042/2866] os/bluestore/bluefs: Reorganize BlueFS state variables Reorganize BlueFS state variables into separate domains: 1) log, 2) dirty, 3) nodes. Each has separate lock. This change is intended to make it easier to control which locks need to be held when specific elements are modified. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 468 +++++++++++++++++++------------------ src/os/bluestore/BlueFS.h | 31 ++- 2 files changed, 257 insertions(+), 242 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 8934520d4710f..e501159b2953c 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -148,9 +148,9 @@ class BlueFS::SocketHook : public AdminSocketHook { out.append(ss); } else if (command == "bluefs files list") { const char* devnames[3] = {"wal","db","slow"}; - std::lock_guard l(bluefs->dirs_lock); + std::lock_guard l(bluefs->nodes.lock); f->open_array_section("files"); - for (auto &d : bluefs->dir_map) { + for (auto &d : bluefs->nodes.dir_map) { std::string dir = d.first; for (auto &r : d.second->file_map) { f->open_object_section("file"); @@ -314,8 +314,8 @@ void BlueFS::_shutdown_logger() void BlueFS::_update_logger_stats() { // we must be holding the lock - logger->set(l_bluefs_num_files, file_map.size()); - logger->set(l_bluefs_log_bytes, log_writer->file->fnode.size); + logger->set(l_bluefs_num_files, nodes.file_map.size()); + logger->set(l_bluefs_log_bytes, log.writer->file->fnode.size); if (alloc[BDEV_WAL]) { logger->set(l_bluefs_wal_total_bytes, _get_total(BDEV_WAL)); @@ -464,10 +464,10 @@ void BlueFS::dump_block_extents(ostream& out) int BlueFS::get_block_extents(unsigned id, interval_set *extents) { - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); dout(10) << __func__ << " bdev " << id << dendl; ceph_assert(id < alloc.size()); - for (auto& p : file_map) { + for (auto& p : nodes.file_map) { for (auto& q : p.second->fnode.extents) { if (q.bdev == id) { extents->insert(q.offset, q.length); @@ -511,10 +511,10 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) &log_file->fnode); vselector->add_usage(log_file->vselector_hint, log_file->fnode); ceph_assert(r == 0); - log_writer = _create_writer(log_file); + log.writer = _create_writer(log_file); // initial txn - log_t.op_init(); + log.t.op_init(); flush_and_sync_log(); // write supers @@ -525,8 +525,8 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) // clean up super = bluefs_super_t(); - _close_writer(log_writer); - log_writer = NULL; + _close_writer(log.writer); + log.writer = NULL; vselector.reset(nullptr); _stop_alloc(); _shutdown_logger(); @@ -781,7 +781,7 @@ int BlueFS::mount() } // init freelist - for (auto& p : file_map) { + for (auto& p : nodes.file_map) { dout(30) << __func__ << " noting alloc for " << p.second->fnode << dendl; for (auto& q : p.second->fnode.extents) { bool is_shared = is_shared_alloc(q.bdev); @@ -804,11 +804,11 @@ int BlueFS::mount() } // set up the log for future writes - log_writer = _create_writer(_get_file(1)); - ceph_assert(log_writer->file->fnode.ino == 1); - log_writer->pos = log_writer->file->fnode.size; + log.writer = _create_writer(_get_file(1)); + ceph_assert(log.writer->file->fnode.ino == 1); + log.writer->pos = log.writer->file->fnode.size; dout(10) << __func__ << " log write pos set to 0x" - << std::hex << log_writer->pos << std::dec + << std::hex << log.writer->pos << std::dec << dendl; return 0; @@ -841,15 +841,15 @@ void BlueFS::umount(bool avoid_compact) sync_metadata(avoid_compact); - _close_writer(log_writer); - log_writer = NULL; + _close_writer(log.writer); + log.writer = NULL; vselector.reset(nullptr); _stop_alloc(); - file_map.clear(); - dir_map.clear(); + nodes.file_map.clear(); + nodes.dir_map.clear(); super = bluefs_super_t(); - log_t.clear(); + log.t.clear(); _shutdown_logger(); } @@ -1033,7 +1033,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) { dout(10) << __func__ << (noop ? " NO-OP" : "") << dendl; ino_last = 1; // by the log - log_seq = 0; + uint64_t log_seq = 0; FileRef log_file; log_file = _get_file(1); @@ -1297,8 +1297,8 @@ int BlueFS::_replay(bool noop, bool to_stdout) if (!noop) { FileRef file = _get_file(ino); ceph_assert(file->fnode.ino); - map::iterator q = dir_map.find(dirname); - ceph_assert(q != dir_map.end()); + map::iterator q = nodes.dir_map.find(dirname); + ceph_assert(q != nodes.dir_map.end()); map::iterator r = q->second->file_map.find(filename); ceph_assert(r == q->second->file_map.end()); @@ -1328,8 +1328,8 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (!noop) { - map::iterator q = dir_map.find(dirname); - ceph_assert(q != dir_map.end()); + map::iterator q = nodes.dir_map.find(dirname); + ceph_assert(q != nodes.dir_map.end()); map::iterator r = q->second->file_map.find(filename); ceph_assert(r != q->second->file_map.end()); ceph_assert(r->second->refs > 0); @@ -1351,9 +1351,9 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (!noop) { - map::iterator q = dir_map.find(dirname); - ceph_assert(q == dir_map.end()); - dir_map[dirname] = ceph::make_ref(); + map::iterator q = nodes.dir_map.find(dirname); + ceph_assert(q == nodes.dir_map.end()); + nodes.dir_map[dirname] = ceph::make_ref(); } } break; @@ -1370,10 +1370,10 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (!noop) { - map::iterator q = dir_map.find(dirname); - ceph_assert(q != dir_map.end()); + map::iterator q = nodes.dir_map.find(dirname); + ceph_assert(q != nodes.dir_map.end()); ceph_assert(q->second->file_map.empty()); - dir_map.erase(q); + nodes.dir_map.erase(q); } } break; @@ -1434,8 +1434,8 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (!noop) { - auto p = file_map.find(ino); - ceph_assert(p != file_map.end()); + auto p = nodes.file_map.find(ino); + ceph_assert(p != nodes.file_map.end()); vselector->sub_usage(p->second->vselector_hint, p->second->fnode); if (cct->_conf->bluefs_log_replay_check_allocations) { int r = _check_allocations(p->second->fnode, @@ -1444,7 +1444,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) return r; } } - file_map.erase(p); + nodes.file_map.erase(p); } } break; @@ -1464,6 +1464,9 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (!noop) { vselector->add_usage(log_file->vselector_hint, log_file->fnode); + log.seq_live = log_seq; + dirty.seq_next = log_seq + 1; + dirty.seq_stable = log_seq; } dout(10) << __func__ << " log file size was 0x" @@ -1477,7 +1480,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) if (!noop) { // verify file link counts are all >0 - for (auto& p : file_map) { + for (auto& p : nodes.file_map) { if (p.second->refs == 0 && p.second->fnode.ino > 1) { derr << __func__ << " file with link count 0: " << p.second->fnode @@ -1494,7 +1497,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) int BlueFS::log_dump() { // only dump log file's content - ceph_assert(log_writer == nullptr && "cannot log_dump on mounted BlueFS"); + ceph_assert(log.writer == nullptr && "cannot log_dump on mounted BlueFS"); int r = _open_super(); if (r < 0) { derr << __func__ << " failed to open super: " << cpp_strerror(r) << dendl; @@ -1536,7 +1539,7 @@ int BlueFS::device_migrate_to_existing( dout(0) << __func__ << " super to be written to " << dev_target << dendl; } - for (auto& [ino, file_ref] : file_map) { + for (auto& [ino, file_ref] : nodes.file_map) { //do not copy log if (file_ref->fnode.ino == 1) { continue; @@ -1674,7 +1677,7 @@ int BlueFS::device_migrate_to_new( flags |= devs_source.count(BDEV_WAL) ? REMOVE_WAL : 0; int dev_target_new = dev_target; //FIXME: remove, makes no sense - for (auto& p : file_map) { + for (auto& p : nodes.file_map) { //do not copy log if (p.second->fnode.ino == 1) { continue; @@ -1790,10 +1793,10 @@ int BlueFS::device_migrate_to_new( BlueFS::FileRef BlueFS::_get_file(uint64_t ino) { - auto p = file_map.find(ino); - if (p == file_map.end()) { + auto p = nodes.file_map.find(ino); + if (p == nodes.file_map.end()) { FileRef f = ceph::make_ref(); - file_map[ino] = f; + nodes.file_map[ino] = f; dout(30) << __func__ << " ino " << ino << " = " << f << " (new)" << dendl; return f; @@ -1808,28 +1811,28 @@ void BlueFS::_drop_link(FileRef file) dout(20) << __func__ << " had refs " << file->refs << " on " << file->fnode << dendl; ceph_assert(file->refs > 0); - ceph_assert(ceph_mutex_is_locked(log_lock)); + ceph_assert(ceph_mutex_is_locked(log.lock)); --file->refs; if (file->refs == 0) { dout(20) << __func__ << " destroying " << file->fnode << dendl; ceph_assert(file->num_reading.load() == 0); vselector->sub_usage(file->vselector_hint, file->fnode); - log_t.op_file_remove(file->fnode.ino); + log.t.op_file_remove(file->fnode.ino); - std::lock_guard dl(dirty_lock); + std::lock_guard dl(dirty.lock); for (auto& r : file->fnode.extents) { pending_release[r.bdev].insert(r.offset, r.length); } - file_map.erase(file->fnode.ino); + nodes.file_map.erase(file->fnode.ino); file->deleted = true; if (file->dirty_seq) { // retract request to serialize changes - ceph_assert(file->dirty_seq > log_seq_stable); - ceph_assert(dirty_files.count(file->dirty_seq)); - auto it = dirty_files[file->dirty_seq].iterator_to(*file); - dirty_files[file->dirty_seq].erase(it); + ceph_assert(file->dirty_seq > dirty.seq_stable); + ceph_assert(dirty.files.count(file->dirty_seq)); + auto it = dirty.files[file->dirty_seq].iterator_to(*file); + dirty.files[file->dirty_seq].erase(it); file->dirty_seq = 0; } } @@ -2073,13 +2076,13 @@ void BlueFS::invalidate_cache(FileRef f, uint64_t offset, uint64_t length) uint64_t BlueFS::estimate_log_size() { - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); int avg_dir_size = 40; // fixme int avg_file_size = 12; uint64_t size = 4096 * 2; - size += file_map.size() * (1 + sizeof(bluefs_fnode_t)); - size += dir_map.size() + (1 + avg_dir_size); - size += file_map.size() * (1 + avg_dir_size + avg_file_size); + size += nodes.file_map.size() * (1 + sizeof(bluefs_fnode_t)); + size += nodes.dir_map.size() + (1 + avg_dir_size); + size += nodes.file_map.size() * (1 + avg_dir_size + avg_file_size); return round_up_to(size, super.block_size); } @@ -2102,8 +2105,8 @@ bool BlueFS::should_start_compact_log() } uint64_t current; { - std::lock_guard dirl(log_lock); - current = log_writer->file->fnode.size; + std::lock_guard dirl(log.lock); + current = log.writer->file->fnode.size; } uint64_t expected = estimate_log_size(); float ratio = (float)current / (float)expected; @@ -2121,14 +2124,14 @@ bool BlueFS::should_start_compact_log() void BlueFS::compact_log_dump_metadata(bluefs_transaction_t *t, int flags) { - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); t->seq = 1; t->uuid = super.uuid; dout(20) << __func__ << " op_init" << dendl; t->op_init(); - for (auto& [ino, file_ref] : file_map) { + for (auto& [ino, file_ref] : nodes.file_map) { if (ino == 1) continue; ceph_assert(ino > 1); @@ -2158,7 +2161,7 @@ void BlueFS::compact_log_dump_metadata(bluefs_transaction_t *t, dout(20) << __func__ << " op_file_update " << file_ref->fnode << dendl; t->op_file_update(file_ref->fnode); } - for (auto& [path, dir_ref] : dir_map) { + for (auto& [path, dir_ref] : nodes.dir_map) { dout(20) << __func__ << " op_dir_create " << path << dendl; t->op_dir_create(path); for (auto& [fname, file_ref] : dir_ref->file_map) { @@ -2173,7 +2176,7 @@ void BlueFS::compact_log_sync() { dout(10) << __func__ << dendl; auto prefer_bdev = - vselector->select_prefer_bdev(log_writer->file->vselector_hint); + vselector->select_prefer_bdev(log.writer->file->vselector_hint); rewrite_log_and_layout_sync(true, BDEV_DB, prefer_bdev, @@ -2190,13 +2193,13 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, int flags, std::optional layout) { - //ceph_assert(ceph_mutex_is_notlocked(log_lock)); - std::lock_guard ll(log_lock); + //ceph_assert(ceph_mutex_is_notlocked(log.lock)); + std::lock_guard ll(log.lock); - File *log_file = log_writer->file.get(); + File *log_file = log.writer->file.get(); // clear out log (be careful who calls us!!!) - log_t.clear(); + log.t.clear(); dout(20) << __func__ << " super_dev:" << super_dev << " log_dev:" << log_dev @@ -2206,8 +2209,8 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, bluefs_transaction_t t; compact_log_dump_metadata(&t, flags); - dout(20) << __func__ << " op_jump_seq " << log_seq << dendl; - t.op_jump_seq(log_seq); + dout(20) << __func__ << " op_jump_seq " << log.seq_live << dendl; + t.op_jump_seq(log.seq_live); bufferlist bl; encode(t, bl); @@ -2234,21 +2237,21 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, } } - _close_writer(log_writer); + _close_writer(log.writer); log_file->fnode.size = bl.length(); vselector->sub_usage(log_file->vselector_hint, old_fnode); vselector->add_usage(log_file->vselector_hint, log_file->fnode); - log_writer = _create_writer(log_file); - log_writer->append(bl); - r = _flush_special(log_writer); + log.writer = _create_writer(log_file); + log.writer->append(bl); + r = _flush_special(log.writer); ceph_assert(r == 0); #ifdef HAVE_LIBAIO if (!cct->_conf->bluefs_sync_write) { list completed_ios; - _claim_completed_aios(log_writer, &completed_ios); - wait_for_aio(log_writer); + _claim_completed_aios(log.writer, &completed_ios); + wait_for_aio(log.writer); completed_ios.clear(); } #endif @@ -2270,7 +2273,7 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, flush_bdev(); dout(10) << __func__ << " release old log extents " << old_fnode.extents << dendl; - std::lock_guard dl(dirty_lock); + std::lock_guard dl(dirty.lock); for (auto& r : old_fnode.extents) { pending_release[r.bdev].insert(r.offset, r.length); } @@ -2309,8 +2312,8 @@ void BlueFS::compact_log_async() return; } - log_lock.lock(); - File *log_file = log_writer->file.get(); + log.lock.lock(); + File *log_file = log.writer->file.get(); FileWriter *new_log_writer = nullptr; FileRef new_log = nullptr; uint64_t new_log_jump_to = 0; @@ -2335,7 +2338,7 @@ void BlueFS::compact_log_async() // 1.1 allocate new log space and jump to it. old_log_jump_to = log_file->fnode.get_allocated(); - uint64_t runway = log_file->fnode.get_allocated() - log_writer->get_effective_write_pos(); + uint64_t runway = log_file->fnode.get_allocated() - log.writer->get_effective_write_pos(); dout(10) << __func__ << " old_log_jump_to 0x" << std::hex << old_log_jump_to << " need 0x" << (old_log_jump_to + cct->_conf->bluefs_max_log_runway) << std::dec << dendl; int r = _allocate(vselector->select_prefer_bdev(log_file->vselector_hint), @@ -2348,8 +2351,8 @@ void BlueFS::compact_log_async() // update the log file change and log a jump to the offset where we want to // write the new entries - log_t.op_file_update(log_file->fnode); // 1.2 - log_t.op_jump(log_seq, old_log_jump_to); // 1.3 + log.t.op_file_update(log_file->fnode); // 1.2 + log.t.op_jump(log.seq_live, old_log_jump_to); // 1.3 // we need to flush all bdev because we will be streaming all dirty files to log // TODO - think - if _flush_and_sync_log_jump will not add dirty files nor release pending allocations @@ -2357,7 +2360,7 @@ void BlueFS::compact_log_async() flush_bdev(); _flush_and_sync_log_jump(old_log_jump_to, runway); - log_lock.unlock(); + log.lock.unlock(); // out of jump section - now log can be used to write to // 2. prepare compacted log @@ -2374,7 +2377,7 @@ void BlueFS::compact_log_async() // conservative estimate for final encoded size new_log_jump_to = round_up_to(t.op_bl.length() + super.block_size * 2, max_alloc_size); - t.op_jump(log_seq, new_log_jump_to); + t.op_jump(log.seq_live, new_log_jump_to); // allocate //FIXME: check if we want DB here? @@ -2443,8 +2446,8 @@ void BlueFS::compact_log_async() // swap the log files. New log file is the log file now. new_log->fnode.swap_extents(log_file->fnode); - log_writer->pos = log_writer->file->fnode.size = - log_writer->pos - old_log_jump_to + new_log_jump_to; + log.writer->pos = log.writer->file->fnode.size = + log.writer->pos - old_log_jump_to + new_log_jump_to; vselector->add_usage(log_file->vselector_hint, log_file->fnode); @@ -2464,7 +2467,7 @@ void BlueFS::compact_log_async() // 7. release old space dout(10) << __func__ << " release old log extents " << old_extents << dendl; { - std::lock_guard dl(dirty_lock); + std::lock_guard dl(dirty.lock); for (auto& r : old_extents) { pending_release[r.bdev].insert(r.offset, r.length); } @@ -2475,10 +2478,10 @@ void BlueFS::compact_log_async() // flush_special does not dirty files /* if (new_log->dirty_seq) { - std::lock_guard dl(dirty_lock); - ceph_assert(dirty_files.count(new_log->dirty_seq)); - auto it = dirty_files[new_log->dirty_seq].iterator_to(*new_log); - dirty_files[new_log->dirty_seq].erase(it); + std::lock_guard dl(dirty.lock); + ceph_assert(dirty.files.count(new_log->dirty_seq)); + auto it = dirty.files[new_log->dirty_seq].iterator_to(*new_log); + dirty.files[new_log->dirty_seq].erase(it); } */ new_log_writer = nullptr; @@ -2503,38 +2506,41 @@ void BlueFS::_pad_bl(bufferlist& bl) } /** - * Adds file modifications from `dirty_files` to bluefs transactions - * already stored in `log_t`. Writes them to disk and waits until are stable. + * Adds file modifications from `dirty.files` to bluefs transactions + * already stored in `log.t`. Writes them to disk and waits until are stable. * Guarantees that file modifications with `want_seq` are already stable on disk. * In addition may insert jump forward transaction to log write position `jump_to`. * * it should lock ability to: - * 1) add to log_t - * 2) modify dirty_files + * 1) add to log.t + * 2) modify dirty.files * 3) add to pending_release * - * pending_release and log_t go with same lock + * pending_release and log.t go with same lock */ -// Adds to log_t file modifications mentioned in `dirty_files`. -// Note: some bluefs ops may have already been stored in log_t transaction. +// Adds to log.t file modifications mentioned in `dirty.files`. +// Note: some bluefs ops may have already been stored in log.t transaction. uint64_t BlueFS::_consume_dirty() { - ceph_assert(ceph_mutex_is_locked(dirty_lock)); - ceph_assert(ceph_mutex_is_locked(log_lock)); + ceph_assert(ceph_mutex_is_locked(dirty.lock)); + ceph_assert(ceph_mutex_is_locked(log.lock)); //acquire new seq - // this will became log_seq_stable once we write - uint64_t seq = log_t.seq = ++log_seq; - log_t.uuid = super.uuid; + // this will became seq_stable once we write + ++dirty.seq_next; + ++log.seq_live; + ceph_assert(dirty.seq_next == log.seq_live + 1); + uint64_t seq = log.t.seq = log.seq_live; + log.t.uuid = super.uuid; // log dirty files - // we just incremented log_seq. It is now illegal to add to dirty_files[log_seq] - auto lsi = dirty_files.find(seq); - if (lsi != dirty_files.end()) { - dout(20) << __func__ << " " << lsi->second.size() << " dirty_files" << dendl; + // we just incremented log_seq. It is now illegal to add to dirty.files[log_seq] + auto lsi = dirty.files.find(seq); + if (lsi != dirty.files.end()) { + dout(20) << __func__ << " " << lsi->second.size() << " dirty.files" << dendl; for (auto &f : lsi->second) { dout(20) << __func__ << " op_file_update " << f.fnode << dendl; - log_t.op_file_update(f.fnode); + log.t.op_file_update(f.fnode); } } return seq; @@ -2544,11 +2550,11 @@ uint64_t BlueFS::_consume_dirty() // Returns space available *BEFORE* adding new space. Signed for additional <0 detection. int64_t BlueFS::_maybe_extend_log() { - ceph_assert(ceph_mutex_is_locked(log_lock)); + ceph_assert(ceph_mutex_is_locked(log.lock)); // allocate some more space (before we run out)? - // BTW: this triggers `flush()` in the `page_aligned_appender` of `log_writer`. - int64_t runway = log_writer->file->fnode.get_allocated() - - log_writer->get_effective_write_pos(); + // BTW: this triggers `flush()` in the `page_aligned_appender` of `log.writer`. + int64_t runway = log.writer->file->fnode.get_allocated() - + log.writer->get_effective_write_pos(); if (runway < (int64_t)cct->_conf->bluefs_min_log_runway) { dout(10) << __func__ << " allocating more log runway (0x" << std::hex << runway << std::dec << " remaining)" << dendl; @@ -2570,26 +2576,26 @@ int64_t BlueFS::_maybe_extend_log() if (log_forbidden_to_expand.load() == true) { return -EWOULDBLOCK; } - vselector->sub_usage(log_writer->file->vselector_hint, log_writer->file->fnode); + vselector->sub_usage(log.writer->file->vselector_hint, log.writer->file->fnode); int r = _allocate( - vselector->select_prefer_bdev(log_writer->file->vselector_hint), + vselector->select_prefer_bdev(log.writer->file->vselector_hint), cct->_conf->bluefs_max_log_runway, - &log_writer->file->fnode); + &log.writer->file->fnode); ceph_assert(r == 0); - vselector->add_usage(log_writer->file->vselector_hint, log_writer->file->fnode); - log_t.op_file_update(log_writer->file->fnode); + vselector->add_usage(log.writer->file->vselector_hint, log.writer->file->fnode); + log.t.op_file_update(log.writer->file->fnode); } return runway; } void BlueFS::_flush_and_sync_log_core(int64_t runway) { - ceph_assert(ceph_mutex_is_locked(log_lock)); - dout(10) << __func__ << " " << log_t << dendl; + ceph_assert(ceph_mutex_is_locked(log.lock)); + dout(10) << __func__ << " " << log.t << dendl; bufferlist bl; bl.reserve(super.block_size); - encode(log_t, bl); + encode(log.t, bl); // pad to block boundary size_t realign = super.block_size - (bl.length() % super.block_size); if (realign && realign != super.block_size) @@ -2602,29 +2608,29 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway) // transaction will not fit extents before growth -> data loss on _replay } - log_writer->append(bl); + log.writer->append(bl); - log_t.clear(); - log_t.seq = 0; // just so debug output is less confusing + log.t.clear(); + log.t.seq = 0; // just so debug output is less confusing - int r = _flush_special(log_writer); + int r = _flush_special(log.writer); ceph_assert(r == 0); } -// Clears dirty_files up to (including) seq_stable. +// Clears dirty.files up to (including) seq_stable. void BlueFS::clear_dirty_set_stable(uint64_t seq) { - std::lock_guard lg(dirty_lock); + std::lock_guard lg(dirty.lock); // clean dirty files - if (seq > log_seq_stable) { - log_seq_stable = seq; - dout(20) << __func__ << " log_seq_stable " << log_seq_stable << dendl; + if (seq > dirty.seq_stable) { + dirty.seq_stable = seq; + dout(20) << __func__ << " seq_stable " << dirty.seq_stable << dendl; // undirty all files that were already streamed to log - auto p = dirty_files.begin(); - while (p != dirty_files.end()) { - if (p->first > log_seq_stable) { + auto p = dirty.files.begin(); + while (p != dirty.files.end()) { + if (p->first > dirty.seq_stable) { dout(20) << __func__ << " done cleaning up dirty files" << dendl; break; } @@ -2633,17 +2639,17 @@ void BlueFS::clear_dirty_set_stable(uint64_t seq) while (l != p->second.end()) { File *file = &*l; ceph_assert(file->dirty_seq > 0); - ceph_assert(file->dirty_seq <= log_seq_stable); + ceph_assert(file->dirty_seq <= dirty.seq_stable); dout(20) << __func__ << " cleaned file " << file->fnode << dendl; file->dirty_seq = 0; p->second.erase(l++); } ceph_assert(p->second.empty()); - dirty_files.erase(p++); + dirty.files.erase(p++); } } else { - dout(20) << __func__ << " log_seq_stable " << log_seq_stable + dout(20) << __func__ << " seq_stable " << dirty.seq_stable << " already >= out seq " << seq << ", we lost a race against another log flush, done" << dendl; } @@ -2677,22 +2683,22 @@ int BlueFS::flush_and_sync_log(uint64_t want_seq) // we synchronize writing to log, by lock to log_lock int64_t available_runway; do { - log_lock.lock(); - dirty_lock.lock(); - if (want_seq && want_seq <= log_seq_stable) { - dout(10) << __func__ << " want_seq " << want_seq << " <= log_seq_stable " - << log_seq_stable << ", done" << dendl; - dirty_lock.unlock(); - log_lock.unlock(); + log.lock.lock(); + dirty.lock.lock(); + if (want_seq && want_seq <= dirty.seq_stable) { + dout(10) << __func__ << " want_seq " << want_seq << " <= seq_stable " + << dirty.seq_stable << ", done" << dendl; + dirty.lock.unlock(); + log.lock.unlock(); return 0; } available_runway = _maybe_extend_log(); if (available_runway == -EWOULDBLOCK) { // we are in need of adding runway, but we are during log-switch from compaction - dirty_lock.unlock(); - //instead log_lock_unlock() do move ownership - std::unique_lock ll(log_lock, std::adopt_lock); + dirty.lock.unlock(); + //instead log.lock_unlock() do move ownership + std::unique_lock ll(log.lock, std::adopt_lock); while (log_forbidden_to_expand.load()) { log_cond.wait(ll); } @@ -2706,12 +2712,12 @@ int BlueFS::flush_and_sync_log(uint64_t want_seq) uint64_t seq = _consume_dirty(); vector> to_release(pending_release.size()); to_release.swap(pending_release); - dirty_lock.unlock(); + dirty.lock.unlock(); _flush_and_sync_log_core(available_runway); - _flush_bdev(log_writer); - //now log_lock is no longer needed - log_lock.unlock(); + _flush_bdev(log.writer); + //now log.lock is no longer needed + log.lock.unlock(); clear_dirty_set_stable(seq); release_pending_allocations(to_release); @@ -2724,26 +2730,26 @@ int BlueFS::flush_and_sync_log(uint64_t want_seq) int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, int64_t available_runway) { - ceph_assert(ceph_mutex_is_locked(log_lock)); + ceph_assert(ceph_mutex_is_locked(log.lock)); ceph_assert(jump_to); // we synchronize writing to log, by lock to log_lock - dirty_lock.lock(); + dirty.lock.lock(); uint64_t seq = _consume_dirty(); vector> to_release(pending_release.size()); to_release.swap(pending_release); - dirty_lock.unlock(); + dirty.lock.unlock(); _flush_and_sync_log_core(available_runway); dout(10) << __func__ << " jumping log offset from 0x" << std::hex - << log_writer->pos << " -> 0x" << jump_to << std::dec << dendl; - log_writer->pos = jump_to; - vselector->sub_usage(log_writer->file->vselector_hint, log_writer->file->fnode.size); - log_writer->file->fnode.size = jump_to; - vselector->add_usage(log_writer->file->vselector_hint, log_writer->file->fnode.size); + << log.writer->pos << " -> 0x" << jump_to << std::dec << dendl; + log.writer->pos = jump_to; + vselector->sub_usage(log.writer->file->vselector_hint, log.writer->file->fnode.size); + log.writer->file->fnode.size = jump_to; + vselector->add_usage(log.writer->file->vselector_hint, log.writer->file->fnode.size); - _flush_bdev(log_writer); + _flush_bdev(log.writer); clear_dirty_set_stable(seq); release_pending_allocations(to_release); @@ -2798,26 +2804,26 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer( int BlueFS::_signal_dirty_to_log(FileWriter *h) { ceph_assert(ceph_mutex_is_locked(h->lock)); - std::lock_guard dl(dirty_lock); + std::lock_guard dl(dirty.lock); h->file->fnode.mtime = ceph_clock_now(); ceph_assert(h->file->fnode.ino >= 1); if (h->file->dirty_seq == 0) { - h->file->dirty_seq = log_seq + 1; - dirty_files[h->file->dirty_seq].push_back(*h->file); - dout(20) << __func__ << " dirty_seq = " << log_seq + 1 + h->file->dirty_seq = dirty.seq_next; + dirty.files[h->file->dirty_seq].push_back(*h->file); + dout(20) << __func__ << " dirty_seq = " << dirty.seq_next << " (was clean)" << dendl; } else { - if (h->file->dirty_seq != log_seq + 1) { + if (h->file->dirty_seq != dirty.seq_next) { // need re-dirty, erase from list first - ceph_assert(dirty_files.count(h->file->dirty_seq)); - auto it = dirty_files[h->file->dirty_seq].iterator_to(*h->file); - dirty_files[h->file->dirty_seq].erase(it); - h->file->dirty_seq = log_seq + 1; - dirty_files[h->file->dirty_seq].push_back(*h->file); - dout(20) << __func__ << " dirty_seq = " << log_seq + 1 + ceph_assert(dirty.files.count(h->file->dirty_seq)); + auto it = dirty.files[h->file->dirty_seq].iterator_to(*h->file); + dirty.files[h->file->dirty_seq].erase(it); + h->file->dirty_seq = dirty.seq_next; + dirty.files[h->file->dirty_seq].push_back(*h->file); + dout(20) << __func__ << " dirty_seq = " << dirty.seq_next << " (was " << h->file->dirty_seq << ")" << dendl; } else { - dout(20) << __func__ << " dirty_seq = " << log_seq + 1 + dout(20) << __func__ << " dirty_seq = " << dirty.seq_next << " (unchanged, do nothing) " << dendl; } } @@ -3121,8 +3127,8 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset) vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size); h->file->fnode.size = offset; vselector->add_usage(h->file->vselector_hint, h->file->fnode.size); - std::lock_guard ll(log_lock); - log_t.op_file_update(h->file->fnode); + std::lock_guard ll(log.lock); + log.t.op_file_update(h->file->fnode); return 0; } @@ -3142,7 +3148,7 @@ int BlueFS::fsync(FileWriter *h) _flush_bdev(h); if (old_dirty_seq) { - uint64_t s = log_seq; + uint64_t s = log.seq_live; // AKAK !!! locks! dout(20) << __func__ << " file metadata was dirty (" << old_dirty_seq << ") on " << h->file->fnode << ", flushing log" << dendl; flush_and_sync_log(old_dirty_seq); @@ -3154,13 +3160,13 @@ int BlueFS::fsync(FileWriter *h) return 0; } -// be careful - either h->file->lock or log_lock must be taken +// be careful - either h->file->lock or log.lock must be taken void BlueFS::_flush_bdev(FileWriter *h) { if (h->file->fnode.ino != 1) { ceph_assert(ceph_mutex_is_locked(h->lock)); } else { - ceph_assert(ceph_mutex_is_locked(log_lock)); + ceph_assert(ceph_mutex_is_locked(log.lock)); } std::array flush_devs = h->dirty_devs; h->dirty_devs.fill(false); @@ -3327,8 +3333,8 @@ int BlueFS::preallocate(FileRef f, uint64_t off, uint64_t len) vselector->add_usage(f->vselector_hint, f->fnode); if (r < 0) return r; - std::lock_guard ll(log_lock); - log_t.op_file_update(f->fnode); + std::lock_guard ll(log.lock); + log.t.op_file_update(f->fnode); } return 0; } @@ -3337,9 +3343,9 @@ void BlueFS::sync_metadata(bool avoid_compact) { bool can_skip_flush; { - std::lock_guard ll(log_lock); - std::lock_guard dl(dirty_lock); - can_skip_flush = log_t.empty() && dirty_files.empty(); + std::lock_guard ll(log.lock); + std::lock_guard dl(dirty.lock); + can_skip_flush = log.t.empty() && dirty.files.empty(); } if (can_skip_flush) { dout(10) << __func__ << " - no pending log events" << dendl; @@ -3376,11 +3382,11 @@ int BlueFS::open_for_write( FileWriter **h, bool overwrite) { - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; - map::iterator p = dir_map.find(dirname); + map::iterator p = nodes.dir_map.find(dirname); DirRef dir; - if (p == dir_map.end()) { + if (p == nodes.dir_map.end()) { // implicitly create the dir dout(20) << __func__ << " dir " << dirname << " does not exist" << dendl; @@ -3402,7 +3408,7 @@ int BlueFS::open_for_write( } file = ceph::make_ref(); file->fnode.ino = ++ino_last; - file_map[ino_last] = file; + nodes.file_map[ino_last] = file; dir->file_map[string{filename}] = file; ++file->refs; create = true; @@ -3439,10 +3445,10 @@ int BlueFS::open_for_write( << " vsel_hint " << file->vselector_hint << dendl; { - std::lock_guard ll(log_lock); - log_t.op_file_update(file->fnode); + std::lock_guard ll(log.lock); + log.t.op_file_update(file->fnode); if (create) - log_t.op_dir_link(dirname, filename, file->fnode.ino); + log.t.op_dir_link(dirname, filename, file->fnode.ino); } *h = _create_writer(file); @@ -3523,11 +3529,11 @@ int BlueFS::open_for_read( FileReader **h, bool random) { - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << (random ? " (random)":" (sequential)") << dendl; - map::iterator p = dir_map.find(dirname); - if (p == dir_map.end()) { + map::iterator p = nodes.dir_map.find(dirname); + if (p == nodes.dir_map.end()) { dout(20) << __func__ << " dir " << dirname << " not found" << dendl; return -ENOENT; } @@ -3552,12 +3558,12 @@ int BlueFS::rename( std::string_view old_dirname, std::string_view old_filename, std::string_view new_dirname, std::string_view new_filename) { - std::lock_guard dirl(dirs_lock); - std::lock_guard ll(log_lock); + std::lock_guard dirl(nodes.lock); + std::lock_guard ll(log.lock); dout(10) << __func__ << " " << old_dirname << "/" << old_filename << " -> " << new_dirname << "/" << new_filename << dendl; - map::iterator p = dir_map.find(old_dirname); - if (p == dir_map.end()) { + map::iterator p = nodes.dir_map.find(old_dirname); + if (p == nodes.dir_map.end()) { dout(20) << __func__ << " dir " << old_dirname << " not found" << dendl; return -ENOENT; } @@ -3571,8 +3577,8 @@ int BlueFS::rename( } FileRef file = q->second; - p = dir_map.find(new_dirname); - if (p == dir_map.end()) { + p = nodes.dir_map.find(new_dirname); + if (p == nodes.dir_map.end()) { dout(20) << __func__ << " dir " << new_dirname << " not found" << dendl; return -ENOENT; } @@ -3583,7 +3589,7 @@ int BlueFS::rename( << ") file " << new_filename << " already exists, unlinking" << dendl; ceph_assert(q->second != file); - log_t.op_dir_unlink(new_dirname, new_filename); + log.t.op_dir_unlink(new_dirname, new_filename); _drop_link(q->second); } @@ -3593,33 +3599,33 @@ int BlueFS::rename( new_dir->file_map[string{new_filename}] = file; old_dir->file_map.erase(string{old_filename}); - log_t.op_dir_link(new_dirname, new_filename, file->fnode.ino); - log_t.op_dir_unlink(old_dirname, old_filename); + log.t.op_dir_link(new_dirname, new_filename, file->fnode.ino); + log.t.op_dir_unlink(old_dirname, old_filename); return 0; } int BlueFS::mkdir(std::string_view dirname) { - std::lock_guard dirl(dirs_lock); - std::lock_guard ll(log_lock); + std::lock_guard dirl(nodes.lock); + std::lock_guard ll(log.lock); dout(10) << __func__ << " " << dirname << dendl; - map::iterator p = dir_map.find(dirname); - if (p != dir_map.end()) { + map::iterator p = nodes.dir_map.find(dirname); + if (p != nodes.dir_map.end()) { dout(20) << __func__ << " dir " << dirname << " exists" << dendl; return -EEXIST; } - dir_map[string{dirname}] = ceph::make_ref(); - log_t.op_dir_create(dirname); + nodes.dir_map[string{dirname}] = ceph::make_ref(); + log.t.op_dir_create(dirname); return 0; } int BlueFS::rmdir(std::string_view dirname) { - std::lock_guard dirl(dirs_lock); - std::lock_guard ll(log_lock); + std::lock_guard dirl(nodes.lock); + std::lock_guard ll(log.lock); dout(10) << __func__ << " " << dirname << dendl; - auto p = dir_map.find(dirname); - if (p == dir_map.end()) { + auto p = nodes.dir_map.find(dirname); + if (p == nodes.dir_map.end()) { dout(20) << __func__ << " dir " << dirname << " does not exist" << dendl; return -ENOENT; } @@ -3628,16 +3634,16 @@ int BlueFS::rmdir(std::string_view dirname) dout(20) << __func__ << " dir " << dirname << " not empty" << dendl; return -ENOTEMPTY; } - dir_map.erase(string{dirname}); - log_t.op_dir_remove(dirname); + nodes.dir_map.erase(string{dirname}); + log.t.op_dir_remove(dirname); return 0; } bool BlueFS::dir_exists(std::string_view dirname) { - std::lock_guard dirl(dirs_lock); - map::iterator p = dir_map.find(dirname); - bool exists = p != dir_map.end(); + std::lock_guard dirl(nodes.lock); + map::iterator p = nodes.dir_map.find(dirname); + bool exists = p != nodes.dir_map.end(); dout(10) << __func__ << " " << dirname << " = " << (int)exists << dendl; return exists; } @@ -3645,10 +3651,10 @@ bool BlueFS::dir_exists(std::string_view dirname) int BlueFS::stat(std::string_view dirname, std::string_view filename, uint64_t *size, utime_t *mtime) { - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; - map::iterator p = dir_map.find(dirname); - if (p == dir_map.end()) { + map::iterator p = nodes.dir_map.find(dirname); + if (p == nodes.dir_map.end()) { dout(20) << __func__ << " dir " << dirname << " not found" << dendl; return -ENOENT; } @@ -3673,10 +3679,10 @@ int BlueFS::stat(std::string_view dirname, std::string_view filename, int BlueFS::lock_file(std::string_view dirname, std::string_view filename, FileLock **plock) { - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; - map::iterator p = dir_map.find(dirname); - if (p == dir_map.end()) { + map::iterator p = nodes.dir_map.find(dirname); + if (p == nodes.dir_map.end()) { dout(20) << __func__ << " dir " << dirname << " not found" << dendl; return -ENOENT; } @@ -3690,12 +3696,12 @@ int BlueFS::lock_file(std::string_view dirname, std::string_view filename, file = ceph::make_ref(); file->fnode.ino = ++ino_last; file->fnode.mtime = ceph_clock_now(); - file_map[ino_last] = file; + nodes.file_map[ino_last] = file; dir->file_map[string{filename}] = file; ++file->refs; - std::lock_guard ll(log_lock); - log_t.op_file_update(file->fnode); - log_t.op_dir_link(dirname, filename, file->fnode.ino); + std::lock_guard ll(log.lock); + log.t.op_file_update(file->fnode); + log.t.op_dir_link(dirname, filename, file->fnode.ino); } else { file = q->second; if (file->locked) { @@ -3712,7 +3718,7 @@ int BlueFS::lock_file(std::string_view dirname, std::string_view filename, int BlueFS::unlock_file(FileLock *fl) { - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); dout(10) << __func__ << " " << fl << " on " << fl->file->fnode << dendl; ceph_assert(fl->file->locked); fl->file->locked = false; @@ -3726,18 +3732,18 @@ int BlueFS::readdir(std::string_view dirname, vector *ls) if (!dirname.empty() && dirname.back() == '/') { dirname.remove_suffix(1); } - std::lock_guard dirl(dirs_lock); + std::lock_guard dirl(nodes.lock); dout(10) << __func__ << " " << dirname << dendl; if (dirname.empty()) { // list dirs - ls->reserve(dir_map.size() + 2); - for (auto& q : dir_map) { + ls->reserve(nodes.dir_map.size() + 2); + for (auto& q : nodes.dir_map) { ls->push_back(q.first); } } else { // list files in dir - map::iterator p = dir_map.find(dirname); - if (p == dir_map.end()) { + map::iterator p = nodes.dir_map.find(dirname); + if (p == nodes.dir_map.end()) { dout(20) << __func__ << " dir " << dirname << " not found" << dendl; return -ENOENT; } @@ -3754,11 +3760,11 @@ int BlueFS::readdir(std::string_view dirname, vector *ls) int BlueFS::unlink(std::string_view dirname, std::string_view filename) { - std::lock_guard dirl(dirs_lock); - std::lock_guard ll(log_lock); + std::lock_guard dirl(nodes.lock); + std::lock_guard ll(log.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; - map::iterator p = dir_map.find(dirname); - if (p == dir_map.end()) { + map::iterator p = nodes.dir_map.find(dirname); + if (p == nodes.dir_map.end()) { dout(20) << __func__ << " dir " << dirname << " not found" << dendl; return -ENOENT; } @@ -3776,7 +3782,7 @@ int BlueFS::unlink(std::string_view dirname, std::string_view filename) return -EBUSY; } dir->file_map.erase(string{filename}); - log_t.op_dir_unlink(dirname, filename); + log.t.op_dir_unlink(dirname, filename); _drop_link(file); return 0; } @@ -3855,7 +3861,7 @@ int BlueFS::do_replay_recovery_read(FileReader *log_reader, dout(2) << __func__ << " processing " << get_device_name(dev) << dendl; interval_set disk_regions; disk_regions.insert(0, bdev[dev]->get_size()); - for (auto f : file_map) { + for (auto f : nodes.file_map) { auto& e = f.second->fnode.extents; for (auto& p : e) { if (p.bdev == dev) { diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 4f368841ff4d9..710f1809bca3e 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -322,22 +322,31 @@ class BlueFS { }; // cache - mempool::bluefs::map> dir_map; ///< dirname -> Dir - mempool::bluefs::unordered_map file_map; ///< ino -> File - - // map of dirty files, files of same dirty_seq are grouped into list. - std::map dirty_files; + struct { + ceph::mutex lock = ceph::make_mutex("BlueFS::nodes.lock"); + mempool::bluefs::map> dir_map; ///< dirname -> Dir + mempool::bluefs::unordered_map file_map; ///< ino -> File + } nodes; bluefs_super_t super; ///< latest superblock (as last written) uint64_t ino_last = 0; ///< last assigned ino (this one is in use) - uint64_t log_seq = 0; ///< last used log seq (by current pending log_t) - uint64_t log_seq_stable = 0; ///< last stable/synced log seq - FileWriter *log_writer = 0; ///< writer for the log - bluefs_transaction_t log_t; ///< pending, unwritten log transaction - bool log_flushing = false; ///< true while flushing the log + + struct { + ceph::mutex lock = ceph::make_mutex("BlueFS::log.lock"); + uint64_t seq_live = 0; //seq that log is currently writing to + FileWriter *writer = 0; + bluefs_transaction_t t; + } log; + + struct { + ceph::mutex lock = ceph::make_mutex("BlueFS::dirty.lock"); + uint64_t seq_stable = 0; //seq that is now stable on disk + uint64_t seq_next = 1; //seq that is ongoing and not yet stable + // map of dirty files, files of same dirty_seq are grouped into list. + std::map files; + } dirty; ceph::condition_variable log_cond; ///< used for state control between log flush / log compaction - ceph::mutex cond_lock = ceph::make_mutex("BlueFS::cond_lock"); ///< .... std::atomic log_is_compacting{false}; ///< signals that bluefs log is already ongoing compaction std::atomic log_forbidden_to_expand{false}; ///< used to signal that async compaction is in state /// that prohibits expansion of bluefs log From f833b461ad6f180838a0682019417abcfd9103b8 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Thu, 24 Jun 2021 11:08:42 +0200 Subject: [PATCH 0043/2866] os/bluestore/bluefs: Clean up log seq progression Splits seq into seq_live and seq_stable. Cleans up log sequencing. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 84 ++++++++++++++++++++++++-------------- src/os/bluestore/BlueFS.h | 12 +++--- 2 files changed, 61 insertions(+), 35 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index e501159b2953c..3a202c65df95f 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -514,6 +514,8 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) log.writer = _create_writer(log_file); // initial txn + ceph_assert(log.seq_live == 1); + log.t.seq = 1; log.t.op_init(); flush_and_sync_log(); @@ -1221,7 +1223,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) << std::endl; } - ceph_assert(next_seq >= log_seq); + ceph_assert(next_seq > log_seq); log_seq = next_seq - 1; // we will increment it below uint64_t skip = offset - read_pos; if (skip) { @@ -1249,7 +1251,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) << ": op_jump_seq " << next_seq << std::endl; } - ceph_assert(next_seq >= log_seq); + ceph_assert(next_seq > log_seq); log_seq = next_seq - 1; // we will increment it below } break; @@ -1464,8 +1466,10 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (!noop) { vselector->add_usage(log_file->vselector_hint, log_file->fnode); - log.seq_live = log_seq; - dirty.seq_next = log_seq + 1; + log.seq_live = log_seq + 1; + dirty.seq_live = log_seq + 1; + log.t.seq = log.seq_live; + //log.seq_stable = log_seq; dirty.seq_stable = log_seq; } @@ -2198,8 +2202,13 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, File *log_file = log.writer->file.get(); - // clear out log (be careful who calls us!!!) + // log.t.seq is always set to current live seq + ceph_assert(log.t.seq == log.seq_live); + // Capturing entire state. Dump anything that has been stored there. log.t.clear(); + log.t.seq = log.seq_live; + // From now on, no changes to log.t are permitted until we finish rewriting log. + // Can allow dirty to remain dirty - log.seq_live will not change. dout(20) << __func__ << " super_dev:" << super_dev << " log_dev:" << log_dev @@ -2351,9 +2360,10 @@ void BlueFS::compact_log_async() // update the log file change and log a jump to the offset where we want to // write the new entries - log.t.op_file_update(log_file->fnode); // 1.2 - log.t.op_jump(log.seq_live, old_log_jump_to); // 1.3 - + log.t.op_file_update(log_file->fnode); + // jump to new position should mean next seq + log.t.op_jump(log.seq_live + 1, old_log_jump_to); + uint64_t seq_now = log.seq_live; // we need to flush all bdev because we will be streaming all dirty files to log // TODO - think - if _flush_and_sync_log_jump will not add dirty files nor release pending allocations // then flush_bdev() will not be necessary @@ -2377,7 +2387,8 @@ void BlueFS::compact_log_async() // conservative estimate for final encoded size new_log_jump_to = round_up_to(t.op_bl.length() + super.block_size * 2, max_alloc_size); - t.op_jump(log.seq_live, new_log_jump_to); + //newly constructed log head will jump to what we had before + t.op_jump(seq_now, new_log_jump_to); // allocate //FIXME: check if we want DB here? @@ -2519,20 +2530,32 @@ void BlueFS::_pad_bl(bufferlist& bl) * pending_release and log.t go with same lock */ -// Adds to log.t file modifications mentioned in `dirty.files`. -// Note: some bluefs ops may have already been stored in log.t transaction. -uint64_t BlueFS::_consume_dirty() +// Returns log seq that was live before advance. +uint64_t BlueFS::_log_advance_seq() { ceph_assert(ceph_mutex_is_locked(dirty.lock)); ceph_assert(ceph_mutex_is_locked(log.lock)); //acquire new seq // this will became seq_stable once we write - ++dirty.seq_next; - ++log.seq_live; - ceph_assert(dirty.seq_next == log.seq_live + 1); - uint64_t seq = log.t.seq = log.seq_live; + ceph_assert(dirty.seq_stable < dirty.seq_live); + ceph_assert(log.t.seq == log.seq_live); + uint64_t seq = log.seq_live; log.t.uuid = super.uuid; + ++dirty.seq_live; + ++log.seq_live; + ceph_assert(dirty.seq_live == log.seq_live); + return seq; +} + + +// Adds to log.t file modifications mentioned in `dirty.files`. +// Note: some bluefs ops may have already been stored in log.t transaction. +void BlueFS::_consume_dirty(uint64_t seq) +{ + ceph_assert(ceph_mutex_is_locked(dirty.lock)); + ceph_assert(ceph_mutex_is_locked(log.lock)); + // log dirty files // we just incremented log_seq. It is now illegal to add to dirty.files[log_seq] auto lsi = dirty.files.find(seq); @@ -2543,7 +2566,6 @@ uint64_t BlueFS::_consume_dirty() log.t.op_file_update(f.fnode); } } - return seq; } // Extends log if its free space is smaller then bluefs_min_log_runway. @@ -2610,8 +2632,9 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway) log.writer->append(bl); + // prepare log for new transactions log.t.clear(); - log.t.seq = 0; // just so debug output is less confusing + log.t.seq = log.seq_live; int r = _flush_special(log.writer); ceph_assert(r == 0); @@ -2706,10 +2729,10 @@ int BlueFS::flush_and_sync_log(uint64_t want_seq) ceph_assert(available_runway >= 0); } } while (available_runway < 0); - - ceph_assert(want_seq == 0 || want_seq <= log_seq + 1); // illegal to request seq that was not created yet - - uint64_t seq = _consume_dirty(); + + ceph_assert(want_seq == 0 || want_seq <= dirty.seq_live); // illegal to request seq that was not created yet + uint64_t seq =_log_advance_seq(); + _consume_dirty(seq); vector> to_release(pending_release.size()); to_release.swap(pending_release); dirty.lock.unlock(); @@ -2736,7 +2759,8 @@ int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, // we synchronize writing to log, by lock to log_lock dirty.lock.lock(); - uint64_t seq = _consume_dirty(); + uint64_t seq =_log_advance_seq(); + _consume_dirty(seq); vector> to_release(pending_release.size()); to_release.swap(pending_release); dirty.lock.unlock(); @@ -2808,22 +2832,22 @@ int BlueFS::_signal_dirty_to_log(FileWriter *h) h->file->fnode.mtime = ceph_clock_now(); ceph_assert(h->file->fnode.ino >= 1); if (h->file->dirty_seq == 0) { - h->file->dirty_seq = dirty.seq_next; + h->file->dirty_seq = dirty.seq_live; dirty.files[h->file->dirty_seq].push_back(*h->file); - dout(20) << __func__ << " dirty_seq = " << dirty.seq_next + dout(20) << __func__ << " dirty_seq = " << dirty.seq_live << " (was clean)" << dendl; } else { - if (h->file->dirty_seq != dirty.seq_next) { + if (h->file->dirty_seq != dirty.seq_live) { // need re-dirty, erase from list first ceph_assert(dirty.files.count(h->file->dirty_seq)); auto it = dirty.files[h->file->dirty_seq].iterator_to(*h->file); dirty.files[h->file->dirty_seq].erase(it); - h->file->dirty_seq = dirty.seq_next; + h->file->dirty_seq = dirty.seq_live; dirty.files[h->file->dirty_seq].push_back(*h->file); - dout(20) << __func__ << " dirty_seq = " << dirty.seq_next + dout(20) << __func__ << " dirty_seq = " << dirty.seq_live << " (was " << h->file->dirty_seq << ")" << dendl; } else { - dout(20) << __func__ << " dirty_seq = " << dirty.seq_next + dout(20) << __func__ << " dirty_seq = " << dirty.seq_live << " (unchanged, do nothing) " << dendl; } } @@ -3154,7 +3178,7 @@ int BlueFS::fsync(FileWriter *h) flush_and_sync_log(old_dirty_seq); // AK - TODO - think - how can dirty_seq change if we are under h lock? ceph_assert(h->file->dirty_seq == 0 || // cleaned - h->file->dirty_seq > s); // or redirtied by someone else + h->file->dirty_seq >= s); // or redirtied by someone else } maybe_compact_log(); return 0; diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 710f1809bca3e..a6e2cab95bdc4 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -333,7 +333,8 @@ class BlueFS { struct { ceph::mutex lock = ceph::make_mutex("BlueFS::log.lock"); - uint64_t seq_live = 0; //seq that log is currently writing to + //uint64_t seq_stable = 0; //seq that is now stable on disk AK - consider also mirroring this + uint64_t seq_live = 1; //seq that log is currently writing to; mirrors dirty.seq_live FileWriter *writer = 0; bluefs_transaction_t t; } log; @@ -341,7 +342,7 @@ class BlueFS { struct { ceph::mutex lock = ceph::make_mutex("BlueFS::dirty.lock"); uint64_t seq_stable = 0; //seq that is now stable on disk - uint64_t seq_next = 1; //seq that is ongoing and not yet stable + uint64_t seq_live = 1; //seq that is ongoing and dirty files will be written to // map of dirty files, files of same dirty_seq are grouped into list. std::map files; } dirty; @@ -425,9 +426,10 @@ class BlueFS { int64_t _maybe_extend_log(); void _extend_log(); - uint64_t _consume_dirty(); - void _clear_dirty_set_stable(uint64_t seq_stable); - void _release_pending_allocations(std::vector>& to_release); + uint64_t _log_advance_seq(); + void _consume_dirty(uint64_t seq); + void clear_dirty_set_stable(uint64_t seq_stable); + void release_pending_allocations(std::vector>& to_release); void _flush_and_sync_log_core(int64_t available_runway); int _flush_and_sync_log_jump(uint64_t jump_to, From 9ad938f8156e57e0b4073d5718cd8dabab07f691 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Mon, 28 Jun 2021 12:56:36 +0200 Subject: [PATCH 0044/2866] os/bluestore/bluefs: Rename functions to reflect lock that are used This is modification that only changes names of functions, so tracking of potential deadlocks is simpler. All internal functions start with _. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 204 +++++++++++++++++++------------------ src/os/bluestore/BlueFS.h | 65 +++++++----- 2 files changed, 148 insertions(+), 121 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 3a202c65df95f..f8145ce889000 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -464,7 +464,7 @@ void BlueFS::dump_block_extents(ostream& out) int BlueFS::get_block_extents(unsigned id, interval_set *extents) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " bdev " << id << dendl; ceph_assert(id < alloc.size()); for (auto& p : nodes.file_map) { @@ -517,13 +517,13 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) ceph_assert(log.seq_live == 1); log.t.seq = 1; log.t.op_init(); - flush_and_sync_log(); + _flush_and_sync_log_LD(); // write supers super.log_fnode = log_file->fnode; super.memorized_layout = layout; _write_super(BDEV_DB); - flush_bdev(); + _flush_bdev(); // clean up super = bluefs_super_t(); @@ -866,7 +866,7 @@ int BlueFS::prepare_new_device(int id, const bluefs_layout_t& layout) new_log_dev_cur = BDEV_NEWDB; new_log_dev_next = BDEV_DB; } - rewrite_log_and_layout_sync(false, + _rewrite_log_and_layout_sync_LN_LD(false, BDEV_NEWDB, new_log_dev_cur, new_log_dev_next, @@ -874,7 +874,7 @@ int BlueFS::prepare_new_device(int id, const bluefs_layout_t& layout) layout); //} } else if(id == BDEV_NEWWAL) { - rewrite_log_and_layout_sync(false, + _rewrite_log_and_layout_sync_LN_LD(false, BDEV_DB, BDEV_NEWWAL, BDEV_WAL, @@ -1089,7 +1089,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) int r = _read(log_reader, read_pos, super.block_size, &bl, NULL); if (r != (int)super.block_size && cct->_conf->bluefs_replay_recovery) { - r += do_replay_recovery_read(log_reader, pos, read_pos + r, super.block_size - r, &bl); + r += _do_replay_recovery_read(log_reader, pos, read_pos + r, super.block_size - r, &bl); } assert(r == (int)super.block_size); read_pos += r; @@ -1149,7 +1149,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) << ", which is past eof" << dendl; if (cct->_conf->bluefs_replay_recovery) { //try to search for more data - r += do_replay_recovery_read(log_reader, pos, read_pos + r, more - r, &t); + r += _do_replay_recovery_read(log_reader, pos, read_pos + r, more - r, &t); if (r < (int)more) { //in normal mode we must read r==more, for recovery it is too strict break; @@ -1650,7 +1650,7 @@ int BlueFS::device_migrate_to_existing( new_log_dev_next; } - rewrite_log_and_layout_sync( + _rewrite_log_and_layout_sync_LN_LD( false, (flags & REMOVE_DB) ? BDEV_SLOW : BDEV_DB, new_log_dev_cur, @@ -1785,7 +1785,7 @@ int BlueFS::device_migrate_to_new( BDEV_DB : BDEV_SLOW; - rewrite_log_and_layout_sync( + _rewrite_log_and_layout_sync_LN_LD( false, super_dev, new_log_dev_cur, @@ -1810,7 +1810,7 @@ BlueFS::FileRef BlueFS::_get_file(uint64_t ino) } } -void BlueFS::_drop_link(FileRef file) +void BlueFS::_drop_link_D(FileRef file) { dout(20) << __func__ << " had refs " << file->refs << " on " << file->fnode << dendl; @@ -2078,9 +2078,9 @@ void BlueFS::invalidate_cache(FileRef f, uint64_t offset, uint64_t length) } } -uint64_t BlueFS::estimate_log_size() +uint64_t BlueFS::_estimate_log_size_N() { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); int avg_dir_size = 40; // fixme int avg_file_size = 12; uint64_t size = 4096 * 2; @@ -2094,14 +2094,14 @@ void BlueFS::compact_log() { if (!cct->_conf->bluefs_replay_recovery_disable_compact) { if (cct->_conf->bluefs_compact_log_sync) { - compact_log_sync(); + _compact_log_sync_LN_LD(); } else { - compact_log_async(); + _compact_log_async_LD_NF_D(); } } } -bool BlueFS::should_start_compact_log() +bool BlueFS::_should_start_compact_log_L_N() { if (log_is_compacting.load() == true) { // compaction is already running @@ -2109,10 +2109,10 @@ bool BlueFS::should_start_compact_log() } uint64_t current; { - std::lock_guard dirl(log.lock); + std::lock_guard ll(log.lock); current = log.writer->file->fnode.size; } - uint64_t expected = estimate_log_size(); + uint64_t expected = _estimate_log_size_N(); float ratio = (float)current / (float)expected; dout(10) << __func__ << " current 0x" << std::hex << current << " expected " << expected << std::dec @@ -2125,10 +2125,10 @@ bool BlueFS::should_start_compact_log() return true; } -void BlueFS::compact_log_dump_metadata(bluefs_transaction_t *t, +void BlueFS::_compact_log_dump_metadata_N(bluefs_transaction_t *t, int flags) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); t->seq = 1; t->uuid = super.uuid; @@ -2176,12 +2176,12 @@ void BlueFS::compact_log_dump_metadata(bluefs_transaction_t *t, } } -void BlueFS::compact_log_sync() +void BlueFS::_compact_log_sync_LN_LD() { dout(10) << __func__ << dendl; auto prefer_bdev = vselector->select_prefer_bdev(log.writer->file->vselector_hint); - rewrite_log_and_layout_sync(true, + _rewrite_log_and_layout_sync_LN_LD(true, BDEV_DB, prefer_bdev, prefer_bdev, @@ -2190,7 +2190,7 @@ void BlueFS::compact_log_sync() logger->inc(l_bluefs_log_compactions); } -void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, +void BlueFS::_rewrite_log_and_layout_sync_LN_LD(bool allocate_with_fallback, int super_dev, int log_dev, int log_dev_new, @@ -2216,7 +2216,7 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, << " flags:" << flags << dendl; bluefs_transaction_t t; - compact_log_dump_metadata(&t, flags); + _compact_log_dump_metadata_N(&t, flags); dout(20) << __func__ << " op_jump_seq " << log.seq_live << dendl; t.op_jump_seq(log.seq_live); @@ -2260,11 +2260,11 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, if (!cct->_conf->bluefs_sync_write) { list completed_ios; _claim_completed_aios(log.writer, &completed_ios); - wait_for_aio(log.writer); + _wait_for_aio(log.writer); completed_ios.clear(); } #endif - flush_bdev(); + _flush_bdev(); super.memorized_layout = layout; super.log_fnode = log_file->fnode; @@ -2279,7 +2279,7 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, ++super.version; _write_super(super_dev); - flush_bdev(); + _flush_bdev(); dout(10) << __func__ << " release old log extents " << old_fnode.extents << dendl; std::lock_guard dl(dirty.lock); @@ -2311,7 +2311,7 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, * 8. Release the old log space. Clean up. */ -void BlueFS::compact_log_async() +void BlueFS::_compact_log_async_LD_NF_D() //also locks FW for new_writer { dout(10) << __func__ << dendl; // only one compaction allowed at one time @@ -2367,8 +2367,8 @@ void BlueFS::compact_log_async() // we need to flush all bdev because we will be streaming all dirty files to log // TODO - think - if _flush_and_sync_log_jump will not add dirty files nor release pending allocations // then flush_bdev() will not be necessary - flush_bdev(); - _flush_and_sync_log_jump(old_log_jump_to, runway); + _flush_bdev(); + _flush_and_sync_log_jump_D(old_log_jump_to, runway); log.lock.unlock(); // out of jump section - now log can be used to write to @@ -2378,7 +2378,7 @@ void BlueFS::compact_log_async() //this needs files lock //what will happen, if a file is modified *twice* before we stream it to log? //the later state that we capture will be seen earlier and replay will see a temporary retraction (!) - compact_log_dump_metadata(&t, 0); + compact_log_dump_metadata_N(&t, 0); uint64_t max_alloc_size = std::max(alloc_size[BDEV_WAL], std::max(alloc_size[BDEV_DB], @@ -2409,6 +2409,7 @@ void BlueFS::compact_log_async() new_log_writer = _create_writer(new_log); new_log_writer->append(bl); + new_log->lock.lock(); new_log_writer->lock.lock(); // 3. flush r = _flush_special(new_log_writer); @@ -2417,6 +2418,7 @@ void BlueFS::compact_log_async() // 4. wait _flush_bdev(new_log_writer); new_log_writer->lock.unlock(); + new_log->lock.unlock(); // 5. update our log fnode // discard first old_log_jump_to extents @@ -2468,7 +2470,7 @@ void BlueFS::compact_log_async() ++super.version; _write_super(BDEV_DB); - flush_bdev(); + _flush_bdev(); old_forbidden = atomic_exchange(&log_forbidden_to_expand, false); ceph_assert(old_forbidden == true); @@ -2641,9 +2643,9 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway) } // Clears dirty.files up to (including) seq_stable. -void BlueFS::clear_dirty_set_stable(uint64_t seq) +void BlueFS::_clear_dirty_set_stable_D(uint64_t seq) { - std::lock_guard lg(dirty.lock); + std::lock_guard dl(dirty.lock); // clean dirty files if (seq > dirty.seq_stable) { @@ -2678,7 +2680,7 @@ void BlueFS::clear_dirty_set_stable(uint64_t seq) } } -void BlueFS::release_pending_allocations(vector>& to_release) +void BlueFS::_release_pending_allocations(vector>& to_release) { for (unsigned i = 0; i < to_release.size(); ++i) { if (!to_release[i].empty()) { @@ -2701,9 +2703,8 @@ void BlueFS::release_pending_allocations(vector>& to_rele } } -int BlueFS::flush_and_sync_log(uint64_t want_seq) +int BlueFS::_flush_and_sync_log_LD(uint64_t want_seq) { - // we synchronize writing to log, by lock to log_lock int64_t available_runway; do { log.lock.lock(); @@ -2742,15 +2743,15 @@ int BlueFS::flush_and_sync_log(uint64_t want_seq) //now log.lock is no longer needed log.lock.unlock(); - clear_dirty_set_stable(seq); - release_pending_allocations(to_release); + _clear_dirty_set_stable_D(seq); + _release_pending_allocations(to_release); _update_logger_stats(); return 0; } // Flushes log and immediately adjusts log_writer pos. -int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, +int BlueFS::_flush_and_sync_log_jump_D(uint64_t jump_to, int64_t available_runway) { ceph_assert(ceph_mutex_is_locked(log.lock)); @@ -2775,8 +2776,8 @@ int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, _flush_bdev(log.writer); - clear_dirty_set_stable(seq); - release_pending_allocations(to_release); + _clear_dirty_set_stable_D(seq); + _release_pending_allocations(to_release); _update_logger_stats(); return 0; @@ -2825,7 +2826,7 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer( return bl; } -int BlueFS::_signal_dirty_to_log(FileWriter *h) +int BlueFS::_signal_dirty_to_log_D(FileWriter *h) { ceph_assert(ceph_mutex_is_locked(h->lock)); std::lock_guard dl(dirty.lock); @@ -2854,12 +2855,12 @@ int BlueFS::_signal_dirty_to_log(FileWriter *h) return 0; } -void BlueFS::flush_range(FileWriter *h, uint64_t offset, uint64_t length) { +void BlueFS::flush_range/*WF*/(FileWriter *h, uint64_t offset, uint64_t length) { std::unique_lock hl(h->lock); - _flush_range(h, offset, length); + _flush_range_F(h, offset, length); } -int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) +int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) { ceph_assert(ceph_mutex_is_locked(h->lock)); dout(10) << __func__ << " " << h << " pos 0x" << std::hex << h->pos @@ -2918,8 +2919,10 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) int BlueFS::_flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered) { - //ceph_assert(ceph_mutex_is_locked(h->lock)); - //ceph_assert(ceph_mutex_is_locked(h->file->lock)); + if (h->file->fnode.ino != 1) { + ceph_assert(ceph_mutex_is_locked(h->lock)); + ceph_assert(ceph_mutex_is_locked(h->file->lock)); + } uint64_t x_off = 0; auto p = h->file->fnode.seek(offset, &x_off); ceph_assert(p != h->file->fnode.extents.end()); @@ -3009,7 +3012,7 @@ void BlueFS::_claim_completed_aios(FileWriter *h, list *ls) dout(10) << __func__ << " got " << ls->size() << " aios" << dendl; } -void BlueFS::wait_for_aio(FileWriter *h) +void BlueFS::_wait_for_aio(FileWriter *h) { // NOTE: this is safe to call without a lock, as long as our reference is // stable. @@ -3027,7 +3030,7 @@ void BlueFS::wait_for_aio(FileWriter *h) #endif -void BlueFS::append_try_flush(FileWriter *h, const char* buf, size_t len) +void BlueFS::append_try_flush/*_WFL_WFN*/(FileWriter *h, const char* buf, size_t len) { std::unique_lock hl(h->lock); size_t max_size = 1ull << 30; // cap to 1GB @@ -3043,10 +3046,10 @@ void BlueFS::append_try_flush(FileWriter *h, const char* buf, size_t len) } if (need_flush) { bool flushed = false; - int r = _flush(h, true, &flushed); + int r = _flush_F(h, true, &flushed); ceph_assert(r == 0); if (r == 0 && flushed) { - maybe_compact_log(); + _maybe_compact_log_LN_N_LD_D(); } // make sure we've made any progress with flush hence the // loop doesn't iterate forever @@ -3059,14 +3062,14 @@ void BlueFS::flush(FileWriter *h, bool force) { std::unique_lock hl(h->lock); bool flushed = false; - int r = _flush(h, force, &flushed); + int r = _flush_F(h, force, &flushed); ceph_assert(r == 0); if (r == 0 && flushed) { - maybe_compact_log(); + _maybe_compact_log_LN_NF_LD_D(); } } -int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) +int BlueFS::_flush_F(FileWriter *h, bool force, bool *flushed) { ceph_assert(ceph_mutex_is_locked(h->lock)); uint64_t length = h->get_buffer_length(); @@ -3090,7 +3093,7 @@ int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) << std::hex << offset << "~" << length << std::dec << " to " << h->file->fnode << dendl; ceph_assert(h->pos <= h->file->fnode.size); - int r = _flush_range(h, offset, length); + int r = _flush_range_F(h, offset, length); if (flushed) { *flushed = true; } @@ -3105,8 +3108,7 @@ int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) // smart enough to discover it on its own. int BlueFS::_flush_special(FileWriter *h) { - //ceph_assert(ceph_mutex_is_locked(h->lock)); - //ceph_assert(ceph_mutex_is_locked(h->file->lock)); + ceph_assert(h->file->fnode.ino <= 1); uint64_t length = h->get_buffer_length(); uint64_t offset = h->pos; ceph_assert(length + offset <= h->file->fnode.get_allocated()); @@ -3137,7 +3139,7 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset) ceph_abort_msg("actually this shouldn't happen"); } if (h->get_buffer_length()) { - int r = _flush(h, true); + int r = _flush_F(h, true); if (r < 0) return r; } @@ -3159,28 +3161,36 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset) int BlueFS::fsync(FileWriter *h) { std::unique_lock hl(h->lock); - dout(10) << __func__ << " " << h << " " << h->file->fnode << dendl; - int r = _flush(h, true); - if (r < 0) - return r; - if (h->file->is_dirty) { - _signal_dirty_to_log(h); - h->file->is_dirty = false; + uint64_t old_dirty_seq = 0; + { + dout(10) << __func__ << " " << h << " " << h->file->fnode << dendl; + int r = _flush_F(h, true); + if (r < 0) + return r; + _flush_bdev(h); + if (h->file->is_dirty) { + _signal_dirty_to_log_D(h); + h->file->is_dirty = false; + } + { + std::lock_guard dl(dirty.lock); + if (dirty.seq_stable < h->file->dirty_seq) { + old_dirty_seq = h->file->dirty_seq; + dout(20) << __func__ << " file metadata was dirty (" << old_dirty_seq + << ") on " << h->file->fnode << ", flushing log" << dendl; + } + } } - uint64_t old_dirty_seq = h->file->dirty_seq; - - _flush_bdev(h); - if (old_dirty_seq) { uint64_t s = log.seq_live; // AKAK !!! locks! dout(20) << __func__ << " file metadata was dirty (" << old_dirty_seq << ") on " << h->file->fnode << ", flushing log" << dendl; - flush_and_sync_log(old_dirty_seq); + _flush_and_sync_log_LD(old_dirty_seq); // AK - TODO - think - how can dirty_seq change if we are under h lock? ceph_assert(h->file->dirty_seq == 0 || // cleaned h->file->dirty_seq >= s); // or redirtied by someone else } - maybe_compact_log(); + _maybe_compact_log_LN_NF_LD_D(); return 0; } @@ -3198,14 +3208,14 @@ void BlueFS::_flush_bdev(FileWriter *h) if (!cct->_conf->bluefs_sync_write) { list completed_ios; _claim_completed_aios(h, &completed_ios); - wait_for_aio(h); + _wait_for_aio(h); completed_ios.clear(); } #endif - flush_bdev(flush_devs); + _flush_bdev(flush_devs); } -void BlueFS::flush_bdev(std::array& dirty_bdevs) +void BlueFS::_flush_bdev(std::array& dirty_bdevs) { // NOTE: this is safe to call without a lock. dout(20) << __func__ << dendl; @@ -3215,7 +3225,7 @@ void BlueFS::flush_bdev(std::array& dirty_bdevs) } } -void BlueFS::flush_bdev() +void BlueFS::_flush_bdev() { // NOTE: this is safe to call without a lock. dout(20) << __func__ << dendl; @@ -3378,24 +3388,24 @@ void BlueFS::sync_metadata(bool avoid_compact) lgeneric_subdout(cct, bluefs, 10) << __func__; start = ceph_clock_now(); *_dout << dendl; - flush_bdev(); // FIXME? - flush_and_sync_log(); + _flush_bdev(); // FIXME? + _flush_and_sync_log_LD(); dout(10) << __func__ << " done in " << (ceph_clock_now() - start) << dendl; } if (!avoid_compact) { - maybe_compact_log(); + _maybe_compact_log_LN_NF_LD_D(); } } -void BlueFS::maybe_compact_log() +void BlueFS::_maybe_compact_log_LN_NF_LD_D() { if (!cct->_conf->bluefs_replay_recovery_disable_compact && - should_start_compact_log()) { + _should_start_compact_log_L_N()) { if (cct->_conf->bluefs_compact_log_sync) { - compact_log_sync(); + _compact_log_sync_LN_LD(); } else { - compact_log_async(); + _compact_log_async_LD_NF_D(); } } } @@ -3406,7 +3416,7 @@ int BlueFS::open_for_write( FileWriter **h, bool overwrite) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); DirRef dir; @@ -3553,7 +3563,7 @@ int BlueFS::open_for_read( FileReader **h, bool random) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << (random ? " (random)":" (sequential)") << dendl; map::iterator p = nodes.dir_map.find(dirname); @@ -3582,8 +3592,8 @@ int BlueFS::rename( std::string_view old_dirname, std::string_view old_filename, std::string_view new_dirname, std::string_view new_filename) { - std::lock_guard dirl(nodes.lock); std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << old_dirname << "/" << old_filename << " -> " << new_dirname << "/" << new_filename << dendl; map::iterator p = nodes.dir_map.find(old_dirname); @@ -3614,7 +3624,7 @@ int BlueFS::rename( << " already exists, unlinking" << dendl; ceph_assert(q->second != file); log.t.op_dir_unlink(new_dirname, new_filename); - _drop_link(q->second); + _drop_link_D(q->second); } dout(10) << __func__ << " " << new_dirname << "/" << new_filename << " " @@ -3630,8 +3640,8 @@ int BlueFS::rename( int BlueFS::mkdir(std::string_view dirname) { - std::lock_guard dirl(nodes.lock); std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << dendl; map::iterator p = nodes.dir_map.find(dirname); if (p != nodes.dir_map.end()) { @@ -3645,8 +3655,8 @@ int BlueFS::mkdir(std::string_view dirname) int BlueFS::rmdir(std::string_view dirname) { - std::lock_guard dirl(nodes.lock); std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << dendl; auto p = nodes.dir_map.find(dirname); if (p == nodes.dir_map.end()) { @@ -3665,7 +3675,7 @@ int BlueFS::rmdir(std::string_view dirname) bool BlueFS::dir_exists(std::string_view dirname) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); map::iterator p = nodes.dir_map.find(dirname); bool exists = p != nodes.dir_map.end(); dout(10) << __func__ << " " << dirname << " = " << (int)exists << dendl; @@ -3675,7 +3685,7 @@ bool BlueFS::dir_exists(std::string_view dirname) int BlueFS::stat(std::string_view dirname, std::string_view filename, uint64_t *size, utime_t *mtime) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); if (p == nodes.dir_map.end()) { @@ -3703,7 +3713,8 @@ int BlueFS::stat(std::string_view dirname, std::string_view filename, int BlueFS::lock_file(std::string_view dirname, std::string_view filename, FileLock **plock) { - std::lock_guard dirl(nodes.lock); + std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); if (p == nodes.dir_map.end()) { @@ -3723,7 +3734,6 @@ int BlueFS::lock_file(std::string_view dirname, std::string_view filename, nodes.file_map[ino_last] = file; dir->file_map[string{filename}] = file; ++file->refs; - std::lock_guard ll(log.lock); log.t.op_file_update(file->fnode); log.t.op_dir_link(dirname, filename, file->fnode.ino); } else { @@ -3742,7 +3752,7 @@ int BlueFS::lock_file(std::string_view dirname, std::string_view filename, int BlueFS::unlock_file(FileLock *fl) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << fl << " on " << fl->file->fnode << dendl; ceph_assert(fl->file->locked); fl->file->locked = false; @@ -3756,7 +3766,7 @@ int BlueFS::readdir(std::string_view dirname, vector *ls) if (!dirname.empty() && dirname.back() == '/') { dirname.remove_suffix(1); } - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << dendl; if (dirname.empty()) { // list dirs @@ -3784,8 +3794,8 @@ int BlueFS::readdir(std::string_view dirname, vector *ls) int BlueFS::unlink(std::string_view dirname, std::string_view filename) { - std::lock_guard dirl(nodes.lock); std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); if (p == nodes.dir_map.end()) { @@ -3807,7 +3817,7 @@ int BlueFS::unlink(std::string_view dirname, std::string_view filename) } dir->file_map.erase(string{filename}); log.t.op_dir_unlink(dirname, filename); - _drop_link(file); + _drop_link_D(file); return 0; } @@ -3830,7 +3840,7 @@ bool BlueFS::wal_is_rotational() When we find it, we decode following bytes as extent. We read that whole extent and then check if merged with existing log part gives a proper bluefs transaction. */ -int BlueFS::do_replay_recovery_read(FileReader *log_reader, +int BlueFS::_do_replay_recovery_read(FileReader *log_reader, size_t replay_pos, size_t read_offset, size_t read_len, diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index a6e2cab95bdc4..3730e8469bc3d 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -400,7 +400,7 @@ class BlueFS { FileRef _get_file(uint64_t ino); - void _drop_link(FileRef f); + void _drop_link_D(FileRef f); unsigned _get_slow_device_id() { return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB; @@ -412,32 +412,32 @@ class BlueFS { PExtentVector* extents); /* signal replay log to include h->file in nearest log flush */ - int _signal_dirty_to_log(FileWriter *h); - int _flush_range(FileWriter *h, uint64_t offset, uint64_t length); + int _signal_dirty_to_log_D(FileWriter *h); + int _flush_range_F(FileWriter *h, uint64_t offset, uint64_t length); int _flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered); - int _flush(FileWriter *h, bool force, bool *flushed = nullptr); + int _flush_F(FileWriter *h, bool force, bool *flushed = nullptr); int _flush_special(FileWriter *h); int _fsync(FileWriter *h); #ifdef HAVE_LIBAIO void _claim_completed_aios(FileWriter *h, std::list *ls); - void wait_for_aio(FileWriter *h); // safe to call without a lock + void _wait_for_aio(FileWriter *h); // safe to call without a lock #endif int64_t _maybe_extend_log(); void _extend_log(); uint64_t _log_advance_seq(); void _consume_dirty(uint64_t seq); - void clear_dirty_set_stable(uint64_t seq_stable); - void release_pending_allocations(std::vector>& to_release); + void _clear_dirty_set_stable_D(uint64_t seq_stable); + void _release_pending_allocations(std::vector>& to_release); void _flush_and_sync_log_core(int64_t available_runway); - int _flush_and_sync_log_jump(uint64_t jump_to, + int _flush_and_sync_log_jump_D(uint64_t jump_to, int64_t available_runway); - int flush_and_sync_log(uint64_t want_seq = 0); + int _flush_and_sync_log_LD(uint64_t want_seq = 0); - uint64_t estimate_log_size(); - bool should_start_compact_log(); + uint64_t _estimate_log_size_N(); + bool _should_start_compact_log_L_N(); enum { REMOVE_DB = 1, @@ -445,12 +445,12 @@ class BlueFS { RENAME_SLOW2DB = 4, RENAME_DB2SLOW = 8, }; - void compact_log_dump_metadata(bluefs_transaction_t *t, + void _compact_log_dump_metadata_N(bluefs_transaction_t *t, int flags); - void compact_log_sync(); - void compact_log_async(); + void _compact_log_sync_LN_LD(); + void _compact_log_async_LD_NF_D(); - void rewrite_log_and_layout_sync(bool allocate_with_fallback, + void _rewrite_log_and_layout_sync_LN_LD(bool allocate_with_fallback, int super_dev, int log_dev, int new_log_dev, @@ -460,8 +460,8 @@ class BlueFS { //void _aio_finish(void *priv); void _flush_bdev(FileWriter *h); - void flush_bdev(); // this is safe to call without a lock - void flush_bdev(std::array& dirty_bdevs); // this is safe to call without a lock + void _flush_bdev(); // this is safe to call without a lock + void _flush_bdev(std::array& dirty_bdevs); // this is safe to call without a lock int _preallocate(FileRef f, uint64_t off, uint64_t len); int _truncate(FileWriter *h, uint64_t off); @@ -581,7 +581,7 @@ class BlueFS { /// sync any uncommitted state to disk void sync_metadata(bool avoid_compact); /// test and compact log, if necessary - void maybe_compact_log(); + void _maybe_compact_log_LN_NF_LD_D(); void set_volume_selector(BlueFSVolumeSelector* s) { vselector.reset(s); @@ -625,11 +625,11 @@ class BlueFS { void invalidate_cache(FileRef f, uint64_t offset, uint64_t len); int preallocate(FileRef f, uint64_t offset, uint64_t len); int truncate(FileWriter *h, uint64_t offset); - int do_replay_recovery_read(FileReader *log, - size_t log_pos, - size_t read_offset, - size_t read_len, - bufferlist* bl); + int _do_replay_recovery_read(FileReader *log, + size_t log_pos, + size_t read_offset, + size_t read_len, + bufferlist* bl); size_t probe_alloc_avail(int dev, uint64_t alloc_size); @@ -695,5 +695,22 @@ class FitToFastVolumeSelector : public OriginalVolumeSelector { void get_paths(const std::string& base, paths& res) const override; }; - +/** + * Directional graph of locks. + * Vertices - Locks. Edges (directed) - locking progression. + * Edge A->B exist if last taken lock was A and next taken lock is B. + * + * Column represents last lock taken. + * Row represents next lock taken. + * + * < | L | D | N | F | W + * -------------|---|---|---|---|--- + * log L | < < + * dirty D | + * nodes N | < + * File F | < + * FileWriter W | < < < + * + * Claim: Deadlock is possible IFF graph contains cycles. + */ #endif From d482ed16b5a2ddfc22b06a33348fab66be5ed2ca Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Mon, 28 Jun 2021 14:57:31 +0200 Subject: [PATCH 0045/2866] os/bluestore/bluefs: Modify dirty_seq progression logic Modified File.dirty_seq to capture dirty.seq_stable instead of 0. This is used to distunguish files already serialized for compact_log_async_dump_metadata() function. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 52 ++++++++++++++++++++++++++++---------- src/os/bluestore/BlueFS.h | 3 +++ 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index f8145ce889000..895c82619d86e 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -1831,13 +1831,12 @@ void BlueFS::_drop_link_D(FileRef file) nodes.file_map.erase(file->fnode.ino); file->deleted = true; - if (file->dirty_seq) { + if (file->dirty_seq > dirty.seq_stable) { // retract request to serialize changes - ceph_assert(file->dirty_seq > dirty.seq_stable); ceph_assert(dirty.files.count(file->dirty_seq)); auto it = dirty.files[file->dirty_seq].iterator_to(*file); dirty.files[file->dirty_seq].erase(it); - file->dirty_seq = 0; + file->dirty_seq = dirty.seq_stable; } } } @@ -2175,6 +2174,40 @@ void BlueFS::_compact_log_dump_metadata_N(bluefs_transaction_t *t, } } } +/* Streams to t files modified before *capture_before_seq* and all dirs */ +void BlueFS::compact_log_async_dump_metadata_NF(bluefs_transaction_t *t, + uint64_t capture_before_seq) +{ + std::lock_guard nl(nodes.lock); + + t->seq = 1; + t->uuid = super.uuid; + dout(20) << __func__ << " op_init" << dendl; + + t->op_init(); + for (auto& [ino, file_ref] : nodes.file_map) { + if (ino == 1) + continue; + ceph_assert(ino > 1); + std::lock_guard fl(file_ref->lock); + if (file_ref->dirty_seq < capture_before_seq) { + dout(20) << __func__ << " op_file_update " << file_ref->fnode << dendl; + t->op_file_update(file_ref->fnode); + } else { + dout(20) << __func__ << " skipping just modified, dirty_seq=" + << file_ref->dirty_seq << " " << file_ref->fnode << dendl; + } + } + for (auto& [path, dir_ref] : nodes.dir_map) { + dout(20) << __func__ << " op_dir_create " << path << dendl; + t->op_dir_create(path); + for (auto& [fname, file_ref] : dir_ref->file_map) { + dout(20) << __func__ << " op_dir_link " << path << "/" << fname + << " to " << file_ref->fnode.ino << dendl; + t->op_dir_link(path, fname, file_ref->fnode.ino); + } + } +} void BlueFS::_compact_log_sync_LN_LD() { @@ -2378,7 +2411,7 @@ void BlueFS::_compact_log_async_LD_NF_D() //also locks FW for new_writer //this needs files lock //what will happen, if a file is modified *twice* before we stream it to log? //the later state that we capture will be seen earlier and replay will see a temporary retraction (!) - compact_log_dump_metadata_N(&t, 0); + compact_log_async_dump_metadata_NF(&t, seq_now); uint64_t max_alloc_size = std::max(alloc_size[BDEV_WAL], std::max(alloc_size[BDEV_DB], @@ -2663,10 +2696,9 @@ void BlueFS::_clear_dirty_set_stable_D(uint64_t seq) auto l = p->second.begin(); while (l != p->second.end()) { File *file = &*l; - ceph_assert(file->dirty_seq > 0); ceph_assert(file->dirty_seq <= dirty.seq_stable); dout(20) << __func__ << " cleaned file " << file->fnode << dendl; - file->dirty_seq = 0; + file->dirty_seq = dirty.seq_stable; p->second.erase(l++); } @@ -2832,7 +2864,7 @@ int BlueFS::_signal_dirty_to_log_D(FileWriter *h) std::lock_guard dl(dirty.lock); h->file->fnode.mtime = ceph_clock_now(); ceph_assert(h->file->fnode.ino >= 1); - if (h->file->dirty_seq == 0) { + if (h->file->dirty_seq <= dirty.seq_stable) { h->file->dirty_seq = dirty.seq_live; dirty.files[h->file->dirty_seq].push_back(*h->file); dout(20) << __func__ << " dirty_seq = " << dirty.seq_live @@ -3182,13 +3214,7 @@ int BlueFS::fsync(FileWriter *h) } } if (old_dirty_seq) { - uint64_t s = log.seq_live; // AKAK !!! locks! - dout(20) << __func__ << " file metadata was dirty (" << old_dirty_seq - << ") on " << h->file->fnode << ", flushing log" << dendl; _flush_and_sync_log_LD(old_dirty_seq); - // AK - TODO - think - how can dirty_seq change if we are under h lock? - ceph_assert(h->file->dirty_seq == 0 || // cleaned - h->file->dirty_seq >= s); // or redirtied by someone else } _maybe_compact_log_LN_NF_LD_D(); return 0; diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 3730e8469bc3d..444d2cadd90da 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -447,6 +447,9 @@ class BlueFS { }; void _compact_log_dump_metadata_N(bluefs_transaction_t *t, int flags); + void compact_log_async_dump_metadata_NF(bluefs_transaction_t *t, + uint64_t capture_before_seq); + void _compact_log_sync_LN_LD(); void _compact_log_async_LD_NF_D(); From a5a0a74a254eb5de16f87c9a199d3437304aa58a Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 29 Jun 2021 13:24:04 +0200 Subject: [PATCH 0046/2866] os/bluestore/bluefs: Weaken locks in append_try_flush Extracted _maybe_compact_log outside of file lock. The sequence FL could deadlock with LNF that is executed in _async_dump_metadata. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 74 +++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 895c82619d86e..3a3d3d74299e7 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2230,7 +2230,6 @@ void BlueFS::_rewrite_log_and_layout_sync_LN_LD(bool allocate_with_fallback, int flags, std::optional layout) { - //ceph_assert(ceph_mutex_is_notlocked(log.lock)); std::lock_guard ll(log.lock); File *log_file = log.writer->file.get(); @@ -2408,11 +2407,7 @@ void BlueFS::_compact_log_async_LD_NF_D() //also locks FW for new_writer // 2. prepare compacted log bluefs_transaction_t t; - //this needs files lock - //what will happen, if a file is modified *twice* before we stream it to log? - //the later state that we capture will be seen earlier and replay will see a temporary retraction (!) compact_log_async_dump_metadata_NF(&t, seq_now); - uint64_t max_alloc_size = std::max(alloc_size[BDEV_WAL], std::max(alloc_size[BDEV_DB], alloc_size[BDEV_SLOW])); @@ -2521,15 +2516,6 @@ void BlueFS::_compact_log_async_LD_NF_D() //also locks FW for new_writer // delete the new log, remove from the dirty files list _close_writer(new_log_writer); - // flush_special does not dirty files - /* - if (new_log->dirty_seq) { - std::lock_guard dl(dirty.lock); - ceph_assert(dirty.files.count(new_log->dirty_seq)); - auto it = dirty.files[new_log->dirty_seq].iterator_to(*new_log); - dirty.files[new_log->dirty_seq].erase(it); - } - */ new_log_writer = nullptr; new_log = nullptr; log_cond.notify_all(); @@ -2789,7 +2775,7 @@ int BlueFS::_flush_and_sync_log_jump_D(uint64_t jump_to, ceph_assert(ceph_mutex_is_locked(log.lock)); ceph_assert(jump_to); - // we synchronize writing to log, by lock to log_lock + // we synchronize writing to log, by lock to log.lock dirty.lock.lock(); uint64_t seq =_log_advance_seq(); @@ -3061,41 +3047,47 @@ void BlueFS::_wait_for_aio(FileWriter *h) } #endif - -void BlueFS::append_try_flush/*_WFL_WFN*/(FileWriter *h, const char* buf, size_t len) +void BlueFS::append_try_flush(FileWriter *h, const char* buf, size_t len) { - std::unique_lock hl(h->lock); - size_t max_size = 1ull << 30; // cap to 1GB - while (len > 0) { - bool need_flush = true; - auto l0 = h->get_buffer_length(); - if (l0 < max_size) { - size_t l = std::min(len, max_size - l0); - h->append(buf, l); - buf += l; - len -= l; - need_flush = h->get_buffer_length() >= cct->_conf->bluefs_min_flush_size; - } - if (need_flush) { - bool flushed = false; - int r = _flush_F(h, true, &flushed); - ceph_assert(r == 0); - if (r == 0 && flushed) { - _maybe_compact_log_LN_N_LD_D(); + bool flushed_sum = false; + { + std::unique_lock hl(h->lock); + size_t max_size = 1ull << 30; // cap to 1GB + while (len > 0) { + bool need_flush = true; + auto l0 = h->get_buffer_length(); + if (l0 < max_size) { + size_t l = std::min(len, max_size - l0); + h->append(buf, l); + buf += l; + len -= l; + need_flush = h->get_buffer_length() >= cct->_conf->bluefs_min_flush_size; + } + if (need_flush) { + bool flushed = false; + int r = _flush_F(h, true, &flushed); + ceph_assert(r == 0); + flushed_sum |= flushed; + // make sure we've made any progress with flush hence the + // loop doesn't iterate forever + ceph_assert(h->get_buffer_length() < max_size); } - // make sure we've made any progress with flush hence the - // loop doesn't iterate forever - ceph_assert(h->get_buffer_length() < max_size); } } + if (flushed_sum) { + _maybe_compact_log_LN_NF_LD_D(); + } } void BlueFS::flush(FileWriter *h, bool force) { - std::unique_lock hl(h->lock); bool flushed = false; - int r = _flush_F(h, force, &flushed); - ceph_assert(r == 0); + int r; + { + std::unique_lock hl(h->lock); + r = _flush_F(h, force, &flushed); + ceph_assert(r == 0); + } if (r == 0 && flushed) { _maybe_compact_log_LN_NF_LD_D(); } From aa9dbdd8e8a31fc1c233b30fde39a995cdc1663c Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Thu, 1 Jul 2021 11:33:20 +0200 Subject: [PATCH 0047/2866] os/bluestore/bluefs: Weaken locks in open_for_write Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 3a3d3d74299e7..ed255cb1b266c 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -3434,7 +3434,7 @@ int BlueFS::open_for_write( FileWriter **h, bool overwrite) { - std::lock_guard nl(nodes.lock); + std::unique_lock nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); DirRef dir; @@ -3496,6 +3496,7 @@ int BlueFS::open_for_write( dout(20) << __func__ << " mapping " << dirname << "/" << filename << " vsel_hint " << file->vselector_hint << dendl; + nl.unlock(); { std::lock_guard ll(log.lock); log.t.op_file_update(file->fnode); From 13b637c32e29ff99b7a0a4596c71fd1f722c26aa Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 10 Aug 2021 15:15:52 +0200 Subject: [PATCH 0048/2866] os/bluestore/bluefs: Rearrange locks in prealocate Rearranged locks in preallocate to avoid possible deadlock with compact_log_async_dump_metadata_NF. Cycle was: L->N rename/mkdir N->F compact_log_async_dump_metadata_NF F->L preallocate Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 2 +- src/os/bluestore/BlueFS.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index ed255cb1b266c..7add76833705f 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -3366,6 +3366,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, int BlueFS::preallocate(FileRef f, uint64_t off, uint64_t len) { + std::lock_guard ll(log.lock); std::lock_guard fl(f->lock); dout(10) << __func__ << " file " << f->fnode << " 0x" << std::hex << off << "~" << len << std::dec << dendl; @@ -3385,7 +3386,6 @@ int BlueFS::preallocate(FileRef f, uint64_t off, uint64_t len) vselector->add_usage(f->vselector_hint, f->fnode); if (r < 0) return r; - std::lock_guard ll(log.lock); log.t.op_file_update(f->fnode); } return 0; diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 444d2cadd90da..a802443b60c63 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -711,8 +711,8 @@ class FitToFastVolumeSelector : public OriginalVolumeSelector { * log L | < < * dirty D | * nodes N | < - * File F | < - * FileWriter W | < < < + * File F | + * FileWriter W | < < < * * Claim: Deadlock is possible IFF graph contains cycles. */ From 95b662fa176cd841e26b76341f91bf2e6c266003 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Wed, 8 Sep 2021 14:58:02 -0400 Subject: [PATCH 0049/2866] rgw: fix rgw.none statistics Only unaccount for objects being removed if they exist. Also, clean up some formatting. Signed-off-by: J. Eric Ivancich --- src/cls/rgw/cls_rgw.cc | 56 +++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 222803e341423..4b9a17cf29dfe 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -903,11 +903,14 @@ int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist static void unaccount_entry(rgw_bucket_dir_header& header, rgw_bucket_dir_entry& entry) { - rgw_bucket_category_stats& stats = header.stats[entry.meta.category]; - stats.num_entries--; - stats.total_size -= entry.meta.accounted_size; - stats.total_size_rounded -= cls_rgw_get_rounded_size(entry.meta.accounted_size); - stats.actual_size -= entry.meta.size; + if (entry.exists) { + rgw_bucket_category_stats& stats = header.stats[entry.meta.category]; + stats.num_entries--; + stats.total_size -= entry.meta.accounted_size; + stats.total_size_rounded -= + cls_rgw_get_rounded_size(entry.meta.accounted_size); + stats.actual_size -= entry.meta.size; + } } static void log_entry(const char *func, const char *str, rgw_bucket_dir_entry *entry) @@ -993,6 +996,7 @@ static int read_key_entry(cls_method_context_t hctx, cls_rgw_obj_key& key, int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { CLS_LOG(10, "entered %s", __func__); + // decode request rgw_cls_obj_complete_op op; auto iter = in->cbegin(); @@ -1002,6 +1006,7 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist CLS_LOG(1, "ERROR: rgw_bucket_complete_op(): failed to decode request\n"); return -EINVAL; } + CLS_LOG(1, "rgw_bucket_complete_op(): request: op=%d name=%s instance=%s ver=%lu:%llu tag=%s", op.op, op.key.name.c_str(), op.key.instance.c_str(), (unsigned long)op.ver.pool, (unsigned long long)op.ver.epoch, @@ -1017,7 +1022,7 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist rgw_bucket_dir_entry entry; bool ondisk = true; - string idx; + std::string idx; rc = read_key_entry(hctx, op.key, &idx, &entry); if (rc == -ENOENT) { entry.key = op.key; @@ -1065,9 +1070,7 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist return 0; } - if (entry.exists) { - unaccount_entry(header, entry); - } + unaccount_entry(header, entry); entry.ver = op.ver; switch ((int)op.op) { @@ -1112,19 +1115,26 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist } if (op.log_op && !header.syncstopped) { - rc = log_index_operation(hctx, op.key, op.op, op.tag, entry.meta.mtime, entry.ver, - CLS_RGW_STATE_COMPLETE, header.ver, header.max_marker, op.bilog_flags, NULL, NULL, &op.zones_trace); - if (rc < 0) + rc = log_index_operation(hctx, op.key, op.op, op.tag, entry.meta.mtime, + entry.ver, CLS_RGW_STATE_COMPLETE, header.ver, + header.max_marker, op.bilog_flags, NULL, NULL, + &op.zones_trace); + if (rc < 0) { return rc; + } } - CLS_LOG(20, "rgw_bucket_complete_op(): remove_objs.size()=%d", (int)op.remove_objs.size()); - for (auto remove_iter = op.remove_objs.begin(); remove_iter != op.remove_objs.end(); ++remove_iter) { + CLS_LOG(20, "rgw_bucket_complete_op(): remove_objs.size()=%d", + int(op.remove_objs.size())); + + for (auto remove_iter = op.remove_objs.begin(); + remove_iter != op.remove_objs.end(); + ++remove_iter) { cls_rgw_obj_key& remove_key = *remove_iter; CLS_LOG(1, "rgw_bucket_complete_op(): removing entries, read_index_entry name=%s instance=%s", remove_key.name.c_str(), remove_key.instance.c_str()); rgw_bucket_dir_entry remove_entry; - string k; + std::string k; int ret = read_key_entry(hctx, remove_key, &k, &remove_entry); if (ret < 0) { CLS_LOG(1, "rgw_bucket_complete_op(): removing entries, read_index_entry name=%s instance=%s ret=%d", @@ -1136,25 +1146,31 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist remove_entry.key.name.c_str(), remove_entry.key.instance.c_str(), int(remove_entry.meta.category)); + unaccount_entry(header, remove_entry); if (op.log_op && !header.syncstopped) { ++header.ver; // increment index version, or we'll overwrite keys previously written - rc = log_index_operation(hctx, remove_key, CLS_RGW_OP_DEL, op.tag, remove_entry.meta.mtime, - remove_entry.ver, CLS_RGW_STATE_COMPLETE, header.ver, header.max_marker, op.bilog_flags, NULL, NULL, &op.zones_trace); - if (rc < 0) + rc = log_index_operation(hctx, remove_key, CLS_RGW_OP_DEL, op.tag, + remove_entry.meta.mtime, remove_entry.ver, + CLS_RGW_STATE_COMPLETE, header.ver, + header.max_marker, op.bilog_flags, NULL, + NULL, &op.zones_trace); + if (rc < 0) { continue; + } } ret = cls_cxx_map_remove_key(hctx, k); if (ret < 0) { - CLS_LOG(1, "rgw_bucket_complete_op(): cls_cxx_map_remove_key, failed to remove entry, name=%s instance=%s read_index_entry ret=%d", remove_key.name.c_str(), remove_key.instance.c_str(), rc); + CLS_LOG(1, "rgw_bucket_complete_op(): cls_cxx_map_remove_key, failed to remove entry, name=%s instance=%s read_index_entry ret=%d", + remove_key.name.c_str(), remove_key.instance.c_str(), rc); continue; } } return write_bucket_header(hctx, &header); -} +} // rgw_bucket_complete_op template static int write_entry(cls_method_context_t hctx, T& entry, const string& key) From 0cb06740a98e8f841bbf905d2df162146b8a8ae7 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 4 Jan 2021 13:26:19 +0800 Subject: [PATCH 0050/2866] qa: enable dynamic debug support to kclient Add a 'kmount_count' counter in ctx to make sure the dynamic debug log won't be disabled until the last kernel mounter is unmounted. Fixes: https://tracker.ceph.com/issues/48736 Signed-off-by: Xiubo Li --- qa/tasks/cephfs/kernel_mount.py | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index 55fdae5d9e84d..bb0562471a755 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -27,6 +27,7 @@ def __init__(self, ctx, test_dir, client_id, client_remote, client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) + self.dynamic_debug = config.get('dynamic_debug', False) self.rbytes = config.get('rbytes', False) self.inst = None self.addr = None @@ -50,6 +51,12 @@ def mount(self, mntopts=[], check_status=True, **kwargs): self._set_filemode_on_mntpt() + if self.dynamic_debug: + kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}', 0) + if kmount_count == 0: + self.enable_dynamic_debug() + self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count + 1 + self.mounted = True def _run_mount_cmd(self, mntopts, check_status): @@ -113,6 +120,13 @@ def umount(self, force=False): timeout=(15*60), omit_sudo=False) raise e + if self.dynamic_debug: + kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}') + assert kmount_count + if kmount_count == 1: + self.disable_dynamic_debug() + self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count - 1 + self.mounted = False self.cleanup() @@ -221,6 +235,32 @@ def get_id_to_dir(): )) raise + def _dynamic_debug_control(self, enable): + """ + Write to dynamic debug control file. + """ + if enable: + fdata = "module ceph +p" + else: + fdata = "module ceph -p" + + self.run_shell_payload(f""" +sudo modprobe ceph +echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control +""") + + def enable_dynamic_debug(self): + """ + Enable the dynamic debug. + """ + self._dynamic_debug_control(True) + + def disable_dynamic_debug(self): + """ + Disable the dynamic debug. + """ + self._dynamic_debug_control(False) + def get_global_id(self): """ Look up the CephFS client ID for this mount, using debugfs. From 8ff162e7f9f88fb312f24a8a6f7ad75eab229250 Mon Sep 17 00:00:00 2001 From: Jiffin Tony Thottan Date: Wed, 15 Sep 2021 15:17:30 +0530 Subject: [PATCH 0051/2866] rgw: remove user-caps from RGWOp_User_Modify The adminOps API for Modify User checks for user-caps value in the request but does handle that in later code path. Caps have seperate apis add_cap and remove_cap already present to modify exisitng caps in user. Hence removing unused cap variables in RGWOp_User_Modify::execute(). Signed-off-by: Jiffin Tony Thottan --- doc/radosgw/adminops.rst | 7 ------- src/rgw/rgw_rest_user.cc | 3 --- 2 files changed, 10 deletions(-) diff --git a/doc/radosgw/adminops.rst b/doc/radosgw/adminops.rst index 36719887a0f3a..2affe8ba181f3 100644 --- a/doc/radosgw/adminops.rst +++ b/doc/radosgw/adminops.rst @@ -603,13 +603,6 @@ Request Parameters :Example: ``s3`` :Required: No -``user-caps`` - -:Description: User capabilities. -:Type: String -:Example: ``usage=read, write; users=read`` -:Required: No - ``max-buckets`` :Description: Specify the maximum number of buckets the user can own. diff --git a/src/rgw/rgw_rest_user.cc b/src/rgw/rgw_rest_user.cc index 37c13003ec536..d6297f5472311 100644 --- a/src/rgw/rgw_rest_user.cc +++ b/src/rgw/rgw_rest_user.cc @@ -257,7 +257,6 @@ void RGWOp_User_Modify::execute(optional_yield y) std::string access_key; std::string secret_key; std::string key_type_str; - std::string caps; std::string op_mask_str; std::string default_placement_str; std::string placement_tags_str; @@ -278,7 +277,6 @@ void RGWOp_User_Modify::execute(optional_yield y) RESTArgs::get_string(s, "email", email, &email, &email_set); RESTArgs::get_string(s, "access-key", access_key, &access_key); RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); - RESTArgs::get_string(s, "user-caps", caps, &caps); RESTArgs::get_bool(s, "generate-key", false, &gen_key); RESTArgs::get_bool(s, "suspended", false, &suspended); RESTArgs::get_int32(s, "max-buckets", RGW_DEFAULT_MAX_BUCKETS, &max_buckets, "a_set); @@ -301,7 +299,6 @@ void RGWOp_User_Modify::execute(optional_yield y) if (email_set) op_state.set_user_email(email); - op_state.set_caps(caps); op_state.set_access_key(access_key); op_state.set_secret_key(secret_key); From 980bf59d45963d7282588671c12ef81c2443cec4 Mon Sep 17 00:00:00 2001 From: Daniel Gryniewicz Date: Thu, 9 Sep 2021 11:58:43 -0400 Subject: [PATCH 0052/2866] RGW Zipper - load user attributes with user Signed-off-by: Daniel Gryniewicz --- src/rgw/rgw_sal_rados.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rgw/rgw_sal_rados.cc b/src/rgw/rgw_sal_rados.cc index ae11e36a6d6ee..00d9b6443dbc5 100644 --- a/src/rgw/rgw_sal_rados.cc +++ b/src/rgw/rgw_sal_rados.cc @@ -205,7 +205,7 @@ int RadosUser::trim_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, u int RadosUser::load_user(const DoutPrefixProvider* dpp, optional_yield y) { - return store->ctl()->user->get_info_by_uid(dpp, info.user_id, &info, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker)); + return store->ctl()->user->get_info_by_uid(dpp, info.user_id, &info, y, RGWUserCtl::GetParams().set_objv_tracker(&objv_tracker).set_attrs(&attrs)); } int RadosUser::store_user(const DoutPrefixProvider* dpp, optional_yield y, bool exclusive, RGWUserInfo* old_info) From ffd4d1c34abdfc56dd14402ddcdd0463ed343090 Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Tue, 21 Sep 2021 11:26:21 +0100 Subject: [PATCH 0053/2866] osd: don't crash in backfill due to inconsistent hash info Instead fail the pull so it will try to recover from other shards (or will mark the object missing). Fixes: https://tracker.ceph.com/issues/48959 Signed-off-by: Mykola Golub --- src/osd/ECBackend.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 5388391c4f0d3..d9d2b5ae1be73 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -605,7 +605,16 @@ void ECBackend::continue_recovery_op( if (op.recovery_progress.first && op.obc) { /* We've got the attrs and the hinfo, might as well use them */ op.hinfo = get_hash_info(op.hoid); - ceph_assert(op.hinfo); + if (!op.hinfo) { + derr << __func__ << ": " << op.hoid << " has inconsistent hinfo" + << dendl; + ceph_assert(recovery_ops.count(op.hoid)); + eversion_t v = recovery_ops[op.hoid].v; + recovery_ops.erase(op.hoid); + get_parent()->on_failed_pull({get_parent()->whoami_shard()}, + op.hoid, v); + return; + } op.xattrs = op.obc->attr_cache; encode(*(op.hinfo), op.xattrs[ECUtil::get_hinfo_key()]); } From 104d8df5f44cf522422914b254536f42bbe31172 Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Tue, 21 Sep 2021 11:28:31 +0100 Subject: [PATCH 0054/2866] osd: no need to crash in deep scrub if hash info is invalid Signed-off-by: Mykola Golub --- src/osd/ECBackend.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index d9d2b5ae1be73..15bb059033abf 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -2588,7 +2588,11 @@ int ECBackend::be_deep_scrub( return 0; } else { if (!get_parent()->get_pool().allows_ecoverwrites()) { - ceph_assert(hinfo->has_chunk_hash()); + if (!hinfo->has_chunk_hash()) { + dout(0) << "_scan_list " << poid << " got invalid hash info" << dendl; + o.ec_size_mismatch = true; + return 0; + } if (hinfo->get_total_chunk_size() != (unsigned)pos.data_pos) { dout(0) << "_scan_list " << poid << " got incorrect size on read 0x" << std::hex << pos From 7d7be31f722d7bf165c69b223b39a600d9fec543 Mon Sep 17 00:00:00 2001 From: wangyunqing Date: Wed, 22 Sep 2021 11:05:40 +0800 Subject: [PATCH 0055/2866] doc/rbd/rbd-mirroring.rst: fix typos Signed-off-by: wangyunqing --- doc/rbd/rbd-mirroring.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/rbd/rbd-mirroring.rst b/doc/rbd/rbd-mirroring.rst index 575d0648d6a4d..464ed2ce92958 100644 --- a/doc/rbd/rbd-mirroring.rst +++ b/doc/rbd/rbd-mirroring.rst @@ -310,7 +310,7 @@ For example:: section of the local or centralized configuration. Note that these settings may allow ``rbd-mirror`` to present a substantial write workload to the destination cluster: monitor cluster performance closely during - migrations and test carefuly before running multiple migrations in parallel. + migrations and test carefully before running multiple migrations in parallel. Create Image Mirror-Snapshots ----------------------------- From 2f2fa761b400debe1c6c210162b91e3c2c3dc3f9 Mon Sep 17 00:00:00 2001 From: Paul Reece Date: Wed, 22 Sep 2021 09:37:23 -0400 Subject: [PATCH 0056/2866] Added comment explaining the change Signed-off-by: Paul Reece --- src/rgw/rgw_multi.cc | 1 + src/rgw/rgw_op.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/src/rgw/rgw_multi.cc b/src/rgw/rgw_multi.cc index 82c5d0ed2cd97..e2f69ac60a7d4 100644 --- a/src/rgw/rgw_multi.cc +++ b/src/rgw/rgw_multi.cc @@ -56,6 +56,7 @@ bool RGWMultiCompleteUpload::xml_end(const char *el) { XMLObj *RGWMultiXMLParser::alloc_obj(const char *el) { XMLObj *obj = NULL; + // CompletedMultipartUpload is incorrect but some versions of some libraries use it, see PR #41700 if (strcmp(el, "CompleteMultipartUpload") == 0 || strcmp(el, "CompletedMultipartUpload") == 0 || strcmp(el, "MultipartUpload") == 0) { diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 5e6f13aed0996..2370213e2f394 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -5888,6 +5888,7 @@ void RGWCompleteMultipart::execute(optional_yield y) parts = static_cast(parser.find_first("CompleteMultipartUpload")); if (!parts || parts->parts.empty()) { + // CompletedMultipartUpload is incorrect but some versions of some libraries use it, see PR #41700 parts = static_cast(parser.find_first("CompletedMultipartUpload")); } From 22c09dee82084e974489b8a4cd19b238f2f2593b Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Wed, 22 Sep 2021 16:36:21 +0200 Subject: [PATCH 0057/2866] os/bluestore/bluefs: Removed unused fields Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index a802443b60c63..085b131d0dcc2 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -309,9 +309,6 @@ class BlueFS { }; private: - ceph::mutex log_lock = ceph::make_mutex("BlueFS::log_lock"); - ceph::mutex dirs_lock = ceph::make_mutex("BlueFS::dirs_lock"); - ceph::mutex dirty_lock = ceph::make_mutex("BlueFS::dirty_lock"); PerfCounters *logger = nullptr; uint64_t max_bytes[MAX_BDEV] = {0}; From f9213ad9cf438afc81a1bf0a53c00184dfaf7b1b Mon Sep 17 00:00:00 2001 From: Paul Cuzner Date: Thu, 23 Sep 2021 09:43:57 +1200 Subject: [PATCH 0058/2866] monitoring:Adding the Ceph MIB The ceph MIB has been created and maintained in a a separate repo: https://github.com/SUSE/prometheus-webhook-snmp This patch brings this MIB into the main ceph repo, so alert changes can target prometheus and potentially SNMP environments within the same PR. Kudos to Volker Theile for creating the MIB. Fixes: https://tracker.ceph.com/issues/52708 Signed-off-by: Paul Cuzner --- monitoring/prometheus/README.md | 7 + monitoring/snmp/CEPH-PROMETHEUS-ALERT-MIB.txt | 377 ++++++++++++++++++ monitoring/snmp/README.md | 24 ++ 3 files changed, 408 insertions(+) create mode 100644 monitoring/snmp/CEPH-PROMETHEUS-ALERT-MIB.txt create mode 100644 monitoring/snmp/README.md diff --git a/monitoring/prometheus/README.md b/monitoring/prometheus/README.md index fde63a35fe2ee..205e373b19c0e 100644 --- a/monitoring/prometheus/README.md +++ b/monitoring/prometheus/README.md @@ -5,3 +5,10 @@ In monitoring/prometheus/alerts you'll find a set of Prometheus alert rules that should provide a decent set of default alerts for a Ceph cluster. Just put this file in a place according to your Prometheus configuration (wherever the `rules` configuration stanza points). + +### SNMP +Ceph provides a MIB (CEPH-PROMETHEUS-ALERT-MIB.txt) to support sending prometheus +alerts through to an SNMP management platform. The translation from prometheus +alert to SNMP trap requires the prometheus alert to contain an OID that maps to +a definition within the MIB. When making changes to the prometheus alert rules +file, developers should include any necessary changes to the MIB. diff --git a/monitoring/snmp/CEPH-PROMETHEUS-ALERT-MIB.txt b/monitoring/snmp/CEPH-PROMETHEUS-ALERT-MIB.txt new file mode 100644 index 0000000000000..0eeda4471cb7d --- /dev/null +++ b/monitoring/snmp/CEPH-PROMETHEUS-ALERT-MIB.txt @@ -0,0 +1,377 @@ +CEPH-PROMETHEUS-ALERT-MIB DEFINITIONS ::= BEGIN + +IMPORTS + MODULE-IDENTITY, OBJECT-TYPE, NOTIFICATION-TYPE, enterprises, TimeTicks + FROM SNMPv2-SMI + DisplayString + FROM SNMPv2-TC +; + +ceph OBJECT IDENTIFIER ::= { enterprises 50495 } +prometheus OBJECT IDENTIFIER ::= { ceph 15 } + +prometheusAlert MODULE-IDENTITY + LAST-UPDATED "201904010000Z" -- 1. Apr 2019 + ORGANIZATION "The Ceph Project" + CONTACT-INFO "https://ceph.com" + DESCRIPTION "Prometheus Alert SNMP MIB" + REVISION "201904010000Z" -- 1. Apr 2019 + DESCRIPTION "Initial version." + ::= { prometheus 1 } + +prometheusAlertObjects OBJECT IDENTIFIER ::= { prometheusAlert 1 } +prometheusAlertTraps OBJECT IDENTIFIER ::= { prometheusAlert 2 } + +-- +-- Objects +-- + +prometheusAlertNotificationAlertName OBJECT-TYPE + SYNTAX DisplayString + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "The name of the Prometheus alert." +::= { prometheusAlertObjects 1 } + +prometheusAlertNotificationStatus OBJECT-TYPE + SYNTAX DisplayString + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "The status of the Prometheus alert." +::= { prometheusAlertObjects 2 } + +prometheusAlertNotificationSeverity OBJECT-TYPE + SYNTAX DisplayString + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "The severity of the Prometheus alert." +::= { prometheusAlertObjects 3 } + +prometheusAlertNotificationInstance OBJECT-TYPE + SYNTAX DisplayString + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "Unique identifier for the Prometheus instance." +::= { prometheusAlertObjects 4 } + +prometheusAlertNotificationJob OBJECT-TYPE + SYNTAX DisplayString + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "The name of the Prometheus job." +::= { prometheusAlertObjects 5 } + +prometheusAlertNotificationDescription OBJECT-TYPE + SYNTAX DisplayString + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "The Prometheus alert description field." +::= { prometheusAlertObjects 6 } + +prometheusAlertNotificationLabels OBJECT-TYPE + SYNTAX DisplayString + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "Additional Prometheus alert labels as JSON string." +::= { prometheusAlertObjects 7 } + +prometheusAlertNotificationTimestamp OBJECT-TYPE + SYNTAX TimeTicks + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "The time when the Prometheus alert occurred." +::= { prometheusAlertObjects 8 } + +prometheusAlertNotificationRawData OBJECT-TYPE + SYNTAX DisplayString + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION "The raw Prometheus alert as JSON string." +::= { prometheusAlertObjects 9 } + +-- +-- Traps +-- + +prometheusAlertTrapDefault NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "Default trap." +::= { prometheusAlertTraps 1 } + +prometheusAlertClusterHealthTraps OBJECT IDENTIFIER ::= { prometheusAlertTraps 2 } +prometheusAlertMonTraps OBJECT IDENTIFIER ::= { prometheusAlertTraps 3 } +prometheusAlertOsdTraps OBJECT IDENTIFIER ::= { prometheusAlertTraps 4 } +prometheusAlertMdsTraps OBJECT IDENTIFIER ::= { prometheusAlertTraps 5 } +prometheusAlertMgrTraps OBJECT IDENTIFIER ::= { prometheusAlertTraps 6 } +prometheusAlertPgsTraps OBJECT IDENTIFIER ::= { prometheusAlertTraps 7 } +prometheusAlertNodesTraps OBJECT IDENTIFIER ::= { prometheusAlertTraps 8 } +prometheusAlertPoolsTraps OBJECT IDENTIFIER ::= { prometheusAlertTraps 9 } + +prometheusAlertClusterHealthTrapHealthError NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "Ceph in health_error state for more than 5m." +::= { prometheusAlertClusterHealthTraps 1 } + +prometheusAlertClusterHealthTrapHealthWarn NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "Ceph in health_warn for more than 15m." +::= { prometheusAlertClusterHealthTraps 2 } + +prometheusAlertMonTrapLowMonitorQuorumCount NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "Monitor count in quorum is low." +::= { prometheusAlertMonTraps 1 } + +prometheusAlertOsdTrap10PercentOsdsDown NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "More than 10% of OSDs are down." +::= { prometheusAlertOsdTraps 1 } + +prometheusAlertOsdTrapOsdDown NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "One or more OSDs down for more than 15 minutes." +::= { prometheusAlertOsdTraps 2 } + +prometheusAlertOsdTrapOsdsNearFull NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "An OSD is dangerously full, over 80%." +::= { prometheusAlertOsdTraps 3 } + +prometheusAlertOsdTrapFlapOsd NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "An OSD was marked down at back up at least once a minute for 5 minutes." +::= { prometheusAlertOsdTraps 4 } + +prometheusAlertOsdTrapHighPgCountDeviation NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "An OSD deviates by more then 30% from average PG count." +::= { prometheusAlertOsdTraps 5 } + +prometheusAlertPgsTrapPgsInactive NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "One or more PGs are inactive for more than 5 minutes." +::= { prometheusAlertPgsTraps 1 } + +prometheusAlertPgsTrapPgsUnclean NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "One or more PGs are not clean for more than 15 minutes." +::= { prometheusAlertPgsTraps 2 } + +prometheusAlertNodesTrapRootVolumeFull NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "Root volume (OSD and MON store) is dangerously full (< 5% free)." +::= { prometheusAlertNodesTraps 1 } + +prometheusAlertNodesTrapNetworkPacketsDropped NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "A node experiences packet drop > 1 packet/s on an interface." +::= { prometheusAlertNodesTraps 2 } + +prometheusAlertNodesTrapNetworkPacketErrors NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "A node experiences packet errors > 1 packet/s on an interface." +::= { prometheusAlertNodesTraps 3 } + +prometheusAlertNodesTrapStorageFilling NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "A mountpoint will be full in less then 5 days assuming the average fillup rate of the past 48 hours." +::= { prometheusAlertNodesTraps 4 } + +prometheusAlertPoolsTrapPoolFull NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "A pool is at 90% capacity or over." +::= { prometheusAlertPoolsTraps 1 } + +prometheusAlertPoolsTrapPoolFillingUp NOTIFICATION-TYPE + OBJECTS { + prometheusAlertNotificationAlertName, + prometheusAlertNotificationStatus, + prometheusAlertNotificationSeverity, + prometheusAlertNotificationInstance, + prometheusAlertNotificationJob, + prometheusAlertNotificationDescription, + prometheusAlertNotificationLabels, + prometheusAlertNotificationTimestamp, + prometheusAlertNotificationRawData + } + STATUS current + DESCRIPTION "A pool will be full in less then 5 days assuming the average fillup rate of the past 48 hours." +::= { prometheusAlertPoolsTraps 2 } + +END diff --git a/monitoring/snmp/README.md b/monitoring/snmp/README.md new file mode 100644 index 0000000000000..dccef1908f89d --- /dev/null +++ b/monitoring/snmp/README.md @@ -0,0 +1,24 @@ +# SNMP schema + +## Traps + +| OID | Description | +| :--- | :--- | +| 1.3.6.1.4.1.50495.15.1.2.1 | The default trap. This is used if no OID is specified in the alert labels. | +| 1.3.6.1.4.1.50495.15.1.2.[2...N] | Custom traps. | + +## Objects + +The following objects are appended as variable binds to an SNMP trap. + +| OID | Type | Description | +| :--- | :---: | :--- | +| 1.3.6.1.4.1.50495.15.1.1.1 | String | The name of the Prometheus alert. | +| 1.3.6.1.4.1.50495.15.1.1.2 | String | The status of the Prometheus alert. | +| 1.3.6.1.4.1.50495.15.1.1.3 | String | The severity of the Prometheus alert. | +| 1.3.6.1.4.1.50495.15.1.1.4 | String | Unique identifier for the Prometheus instance. | +| 1.3.6.1.4.1.50495.15.1.1.5 | String | The name of the Prometheus job. | +| 1.3.6.1.4.1.50495.15.1.1.6 | String | The Prometheus alert description field. | +| 1.3.6.1.4.1.50495.15.1.1.7 | String | Additional Prometheus alert labels as JSON string. | +| 1.3.6.1.4.1.50495.15.1.1.8 | Unix timestamp | The time when the Prometheus alert occurred. | +| 1.3.6.1.4.1.50495.15.1.1.9 | String | The raw Prometheus alert as JSON string. | \ No newline at end of file From bfbd279875f6395e203f17d2f22686f6f9ad9125 Mon Sep 17 00:00:00 2001 From: Elena Chernikova Date: Mon, 27 Sep 2021 01:13:00 +0300 Subject: [PATCH 0059/2866] sysvinit: create /var/run/ceph before pid /var/run is mounted as tmpfs and will be cleared after reboot. On sysvinit ceph init service creates this directory for pid with root ownership. Fixes: https://tracker.ceph.com/issues/19242 Signed-off-by: Elena Chernikova --- src/init-ceph.in | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/init-ceph.in b/src/init-ceph.in index f60c59293181d..9d93e6b85c1d4 100755 --- a/src/init-ceph.in +++ b/src/init-ceph.in @@ -295,9 +295,12 @@ for name in $what; do get_conf pid_file "$run_dir/$type.$id.pid" "pid file" + if [ ! -d $run_dir ]; then + install -d -m0770 -o ceph -g ceph /var/run/ceph + fi + if [ "$command" = "start" -o "$command" = "onestart" ]; then if [ -n "$pid_file" ]; then - do_cmd "mkdir -p "`dirname $pid_file` cmd="$cmd --pid-file $pid_file" fi @@ -423,10 +426,6 @@ for name in $what; do fi echo Starting Ceph $name on $host... - if [ ! -d $run_dir ]; then - # assume /var/run exists - install -d -m0770 -o ceph -g ceph /var/run/ceph - fi get_conf pre_start_eval "" "pre start eval" [ -n "$pre_start_eval" ] && $pre_start_eval get_conf pre_start "" "pre start command" From 2260e9d711fcf61e5530f7dd9d420b14e174f03a Mon Sep 17 00:00:00 2001 From: Prashant D Date: Tue, 28 Sep 2021 06:30:07 +0100 Subject: [PATCH 0060/2866] doc/dev/config: Replace invalid config debug-pg There is no such '--debug-pg' configuration option available, use '--debug-monc' instead. Signed-off-by: Prashant D --- doc/dev/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/config.rst b/doc/dev/config.rst index a42cf3c29f1df..9cb20aee7e864 100644 --- a/doc/dev/config.rst +++ b/doc/dev/config.rst @@ -12,7 +12,7 @@ How can the configuration be set? Well, there are several sources: - command line arguments:: --debug-ms=1 - --debug-pg=10 + --debug-monc=10 etc. - arguments injected at runtime using ``injectargs`` or ``config set`` From e303dc1cf543fa467d6e0a16c122f77570865e21 Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Tue, 21 Sep 2021 16:17:17 +0100 Subject: [PATCH 0061/2866] osd: handle case when stat returns with error in get_hash_info Previously in the case of the error we stored in the cache and returned HashInfo(ec_impl->get_chunk_count()), which e.g. could propagate to non-primary shards, introducing inconsistency. The function's `checks` flag is replaced with `create` flag, which seems to have more clear meaning here. In be_deep_scrub the get_hash_info is still called with the second argument false (i.e. with `create=false`, while previously it was `checks=false`), which is done intentionally. Fixes: https://tracker.ceph.com/issues/48959 Signed-off-by: Mykola Golub --- src/osd/ECBackend.cc | 11 +++++++---- src/osd/ECBackend.h | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 15bb059033abf..b08be6e6f709d 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1827,7 +1827,7 @@ void ECBackend::do_read_op(ReadOp &op) } ECUtil::HashInfoRef ECBackend::get_hash_info( - const hobject_t &hoid, bool checks, const map> *attrs) + const hobject_t &hoid, bool create, const map> *attrs) { dout(10) << __func__ << ": Getting attr on " << hoid << dendl; ECUtil::HashInfoRef ref = unstable_hashinfo_registry.lookup(hoid); @@ -1839,7 +1839,6 @@ ECUtil::HashInfoRef ECBackend::get_hash_info( ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), &st); ECUtil::HashInfo hinfo(ec_impl->get_chunk_count()); - // XXX: What does it mean if there is no object on disk? if (r >= 0) { dout(10) << __func__ << ": found on disk, size " << st.st_size << dendl; bufferlist bl; @@ -1869,7 +1868,7 @@ ECUtil::HashInfoRef ECBackend::get_hash_info( dout(0) << __func__ << ": Can't decode hinfo for " << hoid << dendl; return ECUtil::HashInfoRef(); } - if (checks && hinfo.get_total_chunk_size() != (uint64_t)st.st_size) { + if (hinfo.get_total_chunk_size() != (uint64_t)st.st_size) { dout(0) << __func__ << ": Mismatch of total_chunk_size " << hinfo.get_total_chunk_size() << dendl; return ECUtil::HashInfoRef(); @@ -1877,6 +1876,10 @@ ECUtil::HashInfoRef ECBackend::get_hash_info( } else if (st.st_size > 0) { // If empty object and no hinfo, create it return ECUtil::HashInfoRef(); } + } else if (r != -ENOENT || !create) { + derr << __func__ << ": stat " << hoid << " failed: " << cpp_strerror(r) + << dendl; + return ECUtil::HashInfoRef(); } ref = unstable_hashinfo_registry.lookup_or_create(hoid, hinfo); } @@ -1891,7 +1894,7 @@ void ECBackend::start_rmw(Op *op, PGTransactionUPtr &&t) sinfo, std::move(t), [&](const hobject_t &i) { - ECUtil::HashInfoRef ref = get_hash_info(i, false); + ECUtil::HashInfoRef ref = get_hash_info(i, true); if (!ref) { derr << __func__ << ": get_hash_info(" << i << ")" << " returned a null pointer and there is no " diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index e5e4acee87002..296c1202afa37 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -629,7 +629,7 @@ class ECBackend : public PGBackend { const ECUtil::stripe_info_t sinfo; /// If modified, ensure that the ref is held until the update is applied SharedPtrRegistry unstable_hashinfo_registry; - ECUtil::HashInfoRef get_hash_info(const hobject_t &hoid, bool checks = true, + ECUtil::HashInfoRef get_hash_info(const hobject_t &hoid, bool create = false, const std::map> *attr = NULL); public: From 179124e80fa3e792f2ace9bfcec827edb4caba85 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 28 Sep 2021 11:27:20 -0400 Subject: [PATCH 0062/2866] .github/CODEOWNERS: tag rook devs on rook related PRs Signed-off-by: Sage Weil --- .github/CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 50c0be9a8a6aa..88c6da6c62d4e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -35,6 +35,10 @@ /doc/dev/cephadm @ceph/orchestrators /doc/man/8/cephadm.rst @ceph/orchestrators +/qa/suites/orch/rook @ceph/rook +/src/pybind/mgr/rook @ceph/rook +/doc/mgr/rook.rst @ceph/rook + #ceph-volume /src/ceph-volume @ceph/ceph-volume /doc/ceph-volume @ceph/ceph-volume From d35920da5e58fb6c116d4e46aa63bce1adc91bff Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Wed, 22 Sep 2021 10:43:24 +0100 Subject: [PATCH 0063/2866] qa/suites/rados: add inconsistent hinfo test Signed-off-by: Mykola Golub --- .../singleton/all/ec-inconsistent-hinfo.yaml | 36 +++ qa/tasks/ec_inconsistent_hinfo.py | 225 ++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml create mode 100644 qa/tasks/ec_inconsistent_hinfo.py diff --git a/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml new file mode 100644 index 0000000000000..d71eab1499260 --- /dev/null +++ b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml @@ -0,0 +1,36 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force + log-ignorelist: + - \(OBJECT_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(SLOW_OPS\) + - deep-scrub + - missing + - overall HEALTH_ + - repair + - slow request + - unfound + conf: + osd: + osd min pg log entries: 5 + osd max pg log entries: 5 +- ec_inconsistent_hinfo: diff --git a/qa/tasks/ec_inconsistent_hinfo.py b/qa/tasks/ec_inconsistent_hinfo.py new file mode 100644 index 0000000000000..fa10f2c45ba06 --- /dev/null +++ b/qa/tasks/ec_inconsistent_hinfo.py @@ -0,0 +1,225 @@ +""" +Inconsistent_hinfo +""" +import logging +import time +from dateutil.parser import parse +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def wait_for_deep_scrub_complete(manager, pgid, check_time_now, inconsistent): + log.debug("waiting for pg %s deep-scrub complete (check_time_now=%s)" % + (pgid, check_time_now)) + for i in range(300): + time.sleep(5) + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg); + assert pg + + last_deep_scrub_time = parse(pg['last_deep_scrub_stamp']).strftime('%s') + if last_deep_scrub_time < check_time_now: + log.debug('not scrubbed') + continue + + status = pg['state'].split('+') + if inconsistent: + assert 'inconsistent' in status + else: + assert 'inconsistent' not in status + return + + assert False, 'not scrubbed' + + +def wait_for_backfilling_complete(manager, pgid, from_osd, to_osd): + log.debug("waiting for pg %s backfill from osd.%s to osd.%s complete" % + (pgid, from_osd, to_osd)) + for i in range(300): + time.sleep(5) + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.info('pg=%s' % pg); + assert pg + status = pg['state'].split('+') + if 'active' not in status: + log.debug('not active') + continue + if 'backfilling' in status: + assert from_osd in pg['acting'] and to_osd in pg['up'] + log.debug('backfilling') + continue + if to_osd not in pg['up']: + log.debug('backfill not started yet') + continue + log.debug('backfilled!') + break + +def task(ctx, config): + """ + Test handling of objects with inconsistent hash info during backfill and deep-scrub. + + A pretty rigid cluster is brought up and tested by this task + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'ec_inconsistent_hinfo task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + profile = config.get('erasure_code_profile', { + 'k': '2', + 'm': '1', + 'crush-failure-domain': 'osd' + }) + profile_name = profile.get('name', 'backfill_unfound') + manager.create_erasure_code_profile(profile_name, profile) + pool = manager.create_pool_with_unique_name( + pg_num=1, + erasure_code_profile_name=profile_name, + min_size=2) + manager.raw_cluster_cmd('osd', 'pool', 'set', pool, + 'pg_autoscale_mode', 'off') + + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_clean() + + pool_id = manager.get_pool_num(pool) + pgid = '%d.0' % pool_id + pgs = manager.get_pg_stats() + acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) + log.info("acting=%s" % acting) + assert acting + primary = acting[0] + + # something that is always there, readable and never empty + dummyfile = '/etc/group' + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + log.debug("create test object") + obj = 'test' + rados(ctx, mon, ['-p', pool, 'put', obj, dummyfile]) + + victim = acting[1] + + log.info("remove test object hash info from osd.%s shard and test deep-scrub and repair" + % victim) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=victim) + check_time_now = time.strftime('%s') + manager.raw_cluster_cmd('pg', 'deep-scrub', pgid) + wait_for_deep_scrub_complete(manager, pgid, check_time_now, True) + + check_time_now = time.strftime('%s') + manager.raw_cluster_cmd('pg', 'repair', pgid) + wait_for_deep_scrub_complete(manager, pgid, check_time_now, False) + + log.info("remove test object hash info from primary osd.%s shard and test backfill" + % primary) + + log.debug("write some data") + rados(ctx, mon, ['-p', pool, 'bench', '30', 'write', '-b', '4096', + '--no-cleanup']) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=primary) + + # mark the osd out to trigger a rebalance/backfill + source = acting[1] + target = [x for x in [0, 1, 2, 3] if x not in acting][0] + manager.mark_out_osd(source) + + # wait for everything to peer, backfill and recover + wait_for_backfilling_complete(manager, pgid, source, target) + manager.wait_for_clean() + + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'clean' in pg['state'].split('+') + assert 'inconsistent' not in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 0 + + source, target = target, source + log.info("remove test object hash info from non-primary osd.%s shard and test backfill" + % source) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=source) + + # mark the osd in to trigger a rebalance/backfill + manager.mark_in_osd(target) + + # wait for everything to peer, backfill and recover + wait_for_backfilling_complete(manager, pgid, source, target) + manager.wait_for_clean() + + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'clean' in pg['state'].split('+') + assert 'inconsistent' not in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 0 + + log.info("remove hash info from two shards and test backfill") + + source = acting[2] + target = [x for x in [0, 1, 2, 3] if x not in acting][0] + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=primary) + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=source) + + # mark the osd out to trigger a rebalance/backfill + manager.mark_out_osd(source) + + # wait for everything to peer, backfill and detect unfound object + wait_for_backfilling_complete(manager, pgid, source, target) + + # verify that there is unfound object + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'backfill_unfound' in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 1 + m = manager.list_pg_unfound(pgid) + log.debug('list_pg_unfound=%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + + # mark stuff lost + pgs = manager.get_pg_stats() + manager.raw_cluster_cmd('pg', pgid, 'mark_unfound_lost', 'delete') + + # wait for everything to peer and be happy... + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_recovery() From b20637714e366b44ec604a76bab9095d5f4a1ef2 Mon Sep 17 00:00:00 2001 From: Adam King Date: Tue, 28 Sep 2021 22:10:07 -0400 Subject: [PATCH 0064/2866] mgr/cephadm: use json-pretty for ceph-volume calls To avoid a single line json string that is too long and causes a chunk longer than limit error Fixes: https://tracker.ceph.com/issues/52745 Signed-off-by: Adam King --- src/pybind/mgr/cephadm/serve.py | 2 +- src/pybind/mgr/cephadm/tests/test_cephadm.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index f8d4398b56f6b..7297b9dd2e879 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -470,7 +470,7 @@ def _refresh_facts(self, host: str) -> Optional[str]: def _refresh_host_devices(self, host: str) -> Optional[str]: with_lsm = self.mgr.get_module_option('device_enhanced_scan') inventory_args = ['--', 'inventory', - '--format=json', + '--format=json-pretty', '--filter-for-batch'] if with_lsm: inventory_args.insert(-1, "--with-lsm") diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 50628ca3f78b9..ba5ebe7cf539c 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1355,10 +1355,10 @@ def test_ceph_volume_no_filter_for_batch(self, _run_cephadm, cephadm_module: Cep assert _run_cephadm.mock_calls == [ mock.call('test', 'osd', 'ceph-volume', - ['--', 'inventory', '--format=json', '--filter-for-batch'], image='', + ['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='', no_fsid=False), mock.call('test', 'osd', 'ceph-volume', - ['--', 'inventory', '--format=json'], image='', + ['--', 'inventory', '--format=json-pretty'], image='', no_fsid=False), ] From 97219591dab8f54f239ccfb1891c45b01e7bec79 Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Wed, 22 Sep 2021 09:35:54 +0200 Subject: [PATCH 0065/2866] cephadm: fix osd size reported in 'orch ls' The current code enforces ('running osds' + 1), see [1]. Let's simply print the number of running osds instead of a meaningless calculation. [1] https://github.com/ceph/ceph/commit/1c0a0a6f6153dc555085f263f161666cd171964a#diff-4f2fb7d330e74b64ac41457b7c7a723cd78db86433e0b0c398874531e5a7e39eR1852-R1859 Fixes: https://tracker.ceph.com/issues/50928 Signed-off-by: Guillaume Abrioux --- src/pybind/mgr/orchestrator/module.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index d75271fbff6fc..be9818a1fe0bc 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -604,10 +604,15 @@ def ukn(s: Optional[str]) -> str: else: refreshed = nice_delta(now, s.last_refresh, ' ago') + if s.spec.service_type == 'osd': + running = str(s.running) + else: + running = '{}/{}'.format(s.running, s.size) + table.add_row(( s.spec.service_name(), s.get_port_summary(), - '%d/%d' % (s.running, s.size), + running, refreshed, nice_delta(now, s.created), pl, From 1d848dc447fd4df18893903a053d58129fb8c13a Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Wed, 29 Sep 2021 08:42:33 +0200 Subject: [PATCH 0066/2866] cephadm: add a test on 'ceph orch ls' This adds a unittest in order to cover the output of 'orch ls' command and check the reporting about the running osd number is displayed as expected. Signed-off-by: Guillaume Abrioux Co-Authored-by: Sebastian Wagner --- .../orchestrator/tests/test_orchestrator.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py index 35154270449f8..73a6d9b288908 100644 --- a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py +++ b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py @@ -11,9 +11,10 @@ from test_orchestrator import TestOrchestrator as _TestOrchestrator -from orchestrator import InventoryHost, DaemonDescription, ServiceDescription, DaemonDescriptionStatus +from orchestrator import InventoryHost, DaemonDescription, ServiceDescription, DaemonDescriptionStatus, OrchResult from orchestrator import OrchestratorValidationError from orchestrator.module import to_format, Format, OrchestratorCli, preview_table_osd +from unittest import mock def _test_resource(data, resource_class, extra=None): @@ -140,6 +141,21 @@ def test_handle_command(): retval=-2, stdout='', stderr='No orchestrator configured (try `ceph orch set backend`)') +r = OrchResult([ServiceDescription(spec=ServiceSpec(service_type='osd'), running=123)]) + + +@mock.patch("orchestrator.OrchestratorCli.describe_service", return_value=r) +def test_orch_ls(_describe_service): + cmd = { + 'prefix': 'orch ls', + } + m = OrchestratorCli('orchestrator', 0, 0) + r = m._handle_command(None, cmd) + out = 'NAME PORTS RUNNING REFRESHED AGE PLACEMENT \n' \ + 'osd 123 - - ' + assert r == HandleCommandResult(retval=0, stdout=out, stderr='') + + def test_preview_table_osd_smoke(): data = [ { From baf0c10bb9374f53a6538cc0d9b0390782f457f3 Mon Sep 17 00:00:00 2001 From: wangyingbin Date: Thu, 30 Sep 2021 15:34:30 +0800 Subject: [PATCH 0067/2866] rgw:word spelling error in rgw/rgw_pubsub.h file In line 362, mistakenly write "setting" as "settign" Signed-off-by: wangyingbin --- src/rgw/rgw_pubsub.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rgw/rgw_pubsub.h b/src/rgw/rgw_pubsub.h index ba86bbe845ee2..fb3005a0ab5ed 100644 --- a/src/rgw/rgw_pubsub.h +++ b/src/rgw/rgw_pubsub.h @@ -358,7 +358,7 @@ struct rgw_pubsub_event { }; WRITE_CLASS_ENCODER(rgw_pubsub_event) -// settign a unique ID for an event based on object hash and timestamp +// setting a unique ID for an event based on object hash and timestamp void set_event_id(std::string& id, const std::string& hash, const utime_t& ts); struct rgw_pubsub_sub_dest { From 8ef77a0bbcef710185c7ee6452d6ca8a133843c1 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 28 Sep 2021 16:57:41 +0200 Subject: [PATCH 0068/2866] doc/cephadm: use sphinx autoclass to document RGWSpec Signed-off-by: Sebastian Wagner --- doc/cephadm/services/index.rst | 25 ++------- doc/cephadm/services/rgw.rst | 10 +++- doc/mgr/orchestrator_modules.rst | 4 ++ .../ceph/deployment/service_spec.py | 55 +++++++++++++++++-- 4 files changed, 68 insertions(+), 26 deletions(-) diff --git a/doc/cephadm/services/index.rst b/doc/cephadm/services/index.rst index 00eacdfce2f7e..f34180eb24fc6 100644 --- a/doc/cephadm/services/index.rst +++ b/doc/cephadm/services/index.rst @@ -99,28 +99,15 @@ deployment of services. Here is an example of a service specification in YAML: unmanaged: false networks: - 192.169.142.0/24 - ... + spec: + # Additional service specific attributes. In this example, the properties of this service specification are: -* ``service_type`` - The type of the service. Needs to be either a Ceph - service (``mon``, ``crash``, ``mds``, ``mgr``, ``osd`` or - ``rbd-mirror``), a gateway (``nfs`` or ``rgw``), part of the - monitoring stack (``alertmanager``, ``grafana``, ``node-exporter`` or - ``prometheus``) or (``container``) for custom containers. -* ``service_id`` - The name of the service. -* ``placement`` - See :ref:`orchestrator-cli-placement-spec`. -* ``networks``: A list of network identities instructing the daemons to only bind - on the particular networks in that list. In case the cluster is distributed across multiple - networks, you can add multiple networks. See :ref:`cephadm-monitoring-networks-ports`, - :ref:`cephadm-rgw-networks` and :ref:`cephadm-mgr-networks`. -* ``unmanaged`` If set to ``true``, the orchestrator will not deploy nor remove - any daemon associated with this service. Placement and all other properties - will be ignored. This is useful, if you do not want this service to be - managed temporarily. For cephadm, See :ref:`cephadm-spec-unmanaged` +.. py:currentmodule:: ceph.deployment.service_spec + +.. autoclass:: ServiceSpec + :members: Each service type can have additional service-specific properties. diff --git a/doc/cephadm/services/rgw.rst b/doc/cephadm/services/rgw.rst index 2914042cc375b..47017139b76c3 100644 --- a/doc/cephadm/services/rgw.rst +++ b/doc/cephadm/services/rgw.rst @@ -140,7 +140,15 @@ Then apply this yaml document: ceph orch apply -i myrgw.yaml Note the value of ``rgw_frontend_ssl_certificate`` is a literal string as -indicated by a ``|`` character preserving newline characters. +indicated by a ``|`` character preserving newline characters. + +Service specification +--------------------- + +.. py:currentmodule:: ceph.deployment.service_spec + +.. autoclass:: RGWSpec + :members: .. _orchestrator-haproxy-service-spec: diff --git a/doc/mgr/orchestrator_modules.rst b/doc/mgr/orchestrator_modules.rst index 65e2e4981d133..5991afe613656 100644 --- a/doc/mgr/orchestrator_modules.rst +++ b/doc/mgr/orchestrator_modules.rst @@ -233,6 +233,9 @@ Services .. py:currentmodule:: ceph.deployment.service_spec .. autoclass:: ServiceSpec + :members: + :private-members: + :noindex: .. py:currentmodule:: orchestrator @@ -291,6 +294,7 @@ Services .. py:currentmodule:: ceph.deployment.service_spec .. autoclass:: RGWSpec + :noindex: .. py:currentmodule:: orchestrator diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 98246376e947a..698676798513f 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -411,7 +411,6 @@ class ServiceSpec(object): This structure is supposed to be enough information to start the services. - """ KNOWN_SERVICE_TYPES = 'alertmanager crash grafana iscsi mds mgr mon nfs ' \ 'node-exporter osd prometheus rbd-mirror rgw agent ' \ @@ -466,15 +465,37 @@ def __init__(self, preview_only: bool = False, networks: Optional[List[str]] = None, ): + + #: See :ref:`orchestrator-cli-placement-spec`. self.placement = PlacementSpec() if placement is None else placement # type: PlacementSpec assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES, service_type + #: The type of the service. Needs to be either a Ceph + #: service (``mon``, ``crash``, ``mds``, ``mgr``, ``osd`` or + #: ``rbd-mirror``), a gateway (``nfs`` or ``rgw``), part of the + #: monitoring stack (``alertmanager``, ``grafana``, ``node-exporter`` or + #: ``prometheus``) or (``container``) for custom containers. self.service_type = service_type + + #: The name of the service. Required for ``iscsi``, ``mds``, ``nfs``, ``osd``, ``rgw``, + #: ``container``, ``ingress`` self.service_id = None + if self.service_type in self.REQUIRES_SERVICE_ID: self.service_id = service_id + + #: If set to ``true``, the orchestrator will not deploy nor remove + #: any daemon associated with this service. Placement and all other properties + #: will be ignored. This is useful, if you do not want this service to be + #: managed temporarily. For cephadm, See :ref:`cephadm-spec-unmanaged` self.unmanaged = unmanaged self.preview_only = preview_only + + #: A list of network identities instructing the daemons to only bind + #: on the particular networks in that list. In case the cluster is distributed + #: across multiple networks, you can add multiple networks. See + #: :ref:`cephadm-monitoring-networks-ports`, + #: :ref:`cephadm-rgw-networks` and :ref:`cephadm-mgr-networks`. self.networks: List[str] = networks or [] self.config: Optional[Dict[str, str]] = None @@ -519,6 +540,8 @@ def from_json(cls: Type[ServiceSpecT], json_spec: Dict) -> ServiceSpecT: the next two major releases (octoups, pacific). :param json_spec: A valid dict with ServiceSpec + + :meta private: """ if not isinstance(json_spec, dict): @@ -698,7 +721,21 @@ class RGWSpec(ServiceSpec): """ Settings to configure a (multisite) Ceph RGW + .. code-block:: yaml + + service_type: rgw + service_id: myrealm.myzone + spec: + rgw_realm: myrealm + rgw_zone: myzone + ssl: true + rgw_frontend_port: 1234 + rgw_frontend_type: beast + rgw_frontend_ssl_certificate: ... + + See also: :ref:`orchestrator-cli-service-spec` """ + MANAGED_CONFIG_OPTIONS = ServiceSpec.MANAGED_CONFIG_OPTIONS + [ 'rgw_zone', 'rgw_realm', @@ -732,11 +769,17 @@ def __init__(self, placement=placement, unmanaged=unmanaged, preview_only=preview_only, config=config, networks=networks) - self.rgw_realm = rgw_realm - self.rgw_zone = rgw_zone - self.rgw_frontend_port = rgw_frontend_port - self.rgw_frontend_ssl_certificate = rgw_frontend_ssl_certificate - self.rgw_frontend_type = rgw_frontend_type + #: The RGW realm associated with this service. Needs to be manually created + self.rgw_realm: Optional[str] = rgw_realm + #: The RGW zone associated with this service. Needs to be manually created + self.rgw_zone: Optional[str] = rgw_zone + #: Port of the RGW daemons + self.rgw_frontend_port: Optional[int] = rgw_frontend_port + #: List of SSL certificates + self.rgw_frontend_ssl_certificate: Optional[List[str]] = rgw_frontend_ssl_certificate + #: civetweb or beast (default: beast). See :ref:`rgw_frontends` + self.rgw_frontend_type: Optional[str] = rgw_frontend_type + #: enable SSL self.ssl = ssl def get_port_start(self) -> List[int]: From 2e7b369373236c32e429463512521da1fc7e159d Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 29 Sep 2021 12:00:42 +0200 Subject: [PATCH 0069/2866] doc/cephadm: use sphinx autoclass to document IscsiServiceSpec Signed-off-by: Sebastian Wagner --- doc/cephadm/services/iscsi.rst | 8 +++++++- src/python-common/ceph/deployment/service_spec.py | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/cephadm/services/iscsi.rst b/doc/cephadm/services/iscsi.rst index 581d2c9d40612..e039e8d9a3467 100644 --- a/doc/cephadm/services/iscsi.rst +++ b/doc/cephadm/services/iscsi.rst @@ -44,7 +44,6 @@ For example: trusted_ip_list: "IP_ADDRESS_1,IP_ADDRESS_2,IP_ADDRESS_3,..." api_user: API_USERNAME api_password: API_PASSWORD - api_secure: true ssl_cert: | -----BEGIN CERTIFICATE----- MIIDtTCCAp2gAwIBAgIYMC4xNzc1NDQxNjEzMzc2MjMyXzxvQ7EcMA0GCSqGSIb3 @@ -58,6 +57,11 @@ For example: [...] -----END PRIVATE KEY----- +.. py:currentmodule:: ceph.deployment.service_spec + +.. autoclass:: IscsiServiceSpec + :members: + The specification can then be applied using: @@ -68,6 +72,8 @@ The specification can then be applied using: See :ref:`orchestrator-cli-placement-spec` for details of the placement specification. +See also: :ref:`orchestrator-cli-service-spec`. + Further Reading =============== diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 698676798513f..21c16026500a1 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -833,12 +833,19 @@ def __init__(self, #: RADOS pool where ceph-iscsi config data is stored. self.pool = pool + #: list of trusted IP addresses self.trusted_ip_list = trusted_ip_list + #: ``api_port`` as defined in the ``iscsi-gateway.cfg`` self.api_port = api_port + #: ``api_user`` as defined in the ``iscsi-gateway.cfg`` self.api_user = api_user + #: ``api_password`` as defined in the ``iscsi-gateway.cfg`` self.api_password = api_password + #: ``api_secure`` as defined in the ``iscsi-gateway.cfg`` self.api_secure = api_secure + #: SSL certificate self.ssl_cert = ssl_cert + #: SSL private key self.ssl_key = ssl_key if not self.api_secure and self.ssl_cert and self.ssl_key: From ce73047d25599b247ab889614fa62684b0c7aca0 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 29 Sep 2021 12:02:28 +0200 Subject: [PATCH 0070/2866] doc/cephadm: update example of custom container spec file Signed-off-by: Sebastian Wagner --- doc/cephadm/services/custom-container.rst | 35 ++++++++++++----------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/doc/cephadm/services/custom-container.rst b/doc/cephadm/services/custom-container.rst index 542fcf16261d8..3ece248c5a4c9 100644 --- a/doc/cephadm/services/custom-container.rst +++ b/doc/cephadm/services/custom-container.rst @@ -11,32 +11,33 @@ A corresponding :ref:`orchestrator-cli-service-spec` must look like: service_id: foo placement: ... - image: docker.io/library/foo:latest - entrypoint: /usr/bin/foo - uid: 1000 - gid: 1000 - args: + spec: + image: docker.io/library/foo:latest + entrypoint: /usr/bin/foo + uid: 1000 + gid: 1000 + args: - "--net=host" - "--cpus=2" - ports: + ports: - 8080 - 8443 - envs: + envs: - SECRET=mypassword - PORT=8080 - PUID=1000 - PGID=1000 - volume_mounts: + volume_mounts: CONFIG_DIR: /etc/foo - bind_mounts: - - ['type=bind', 'source=lib/modules', 'destination=/lib/modules', 'ro=true'] - dirs: - - CONFIG_DIR - files: - CONFIG_DIR/foo.conf: - - refresh=true - - username=xyz - - "port: 1234" + bind_mounts: + - ['type=bind', 'source=lib/modules', 'destination=/lib/modules', 'ro=true'] + dirs: + - CONFIG_DIR + files: + CONFIG_DIR/foo.conf: + - refresh=true + - username=xyz + - "port: 1234" where the properties of a service specification are: From bdc802354554ea9c5e7c0377e3fe695f719921a7 Mon Sep 17 00:00:00 2001 From: Adam King Date: Thu, 30 Sep 2021 08:34:58 -0400 Subject: [PATCH 0071/2866] mgr/cephadm: put list_servers output in debug logs Over time, seen a number of cases where daemons are being marked stray for unknown reasons and it is always difficult to debug due to the difficulty of finding what list_servers is outputting. I think it would help to include this in the debug logs. That way we have that side of the info available and can easily compare it to daemon/host info cephadm is storing. Signed-off-by: Adam King --- src/pybind/mgr/cephadm/serve.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index f8d4398b56f6b..019d847f65988 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -537,6 +537,7 @@ def _check_for_strays(self) -> None: del self.mgr.health_checks[k] if self.mgr.warn_on_stray_hosts or self.mgr.warn_on_stray_daemons: ls = self.mgr.list_servers() + self.log.debug(ls) managed = self.mgr.cache.get_daemon_names() host_detail = [] # type: List[str] host_num_daemons = 0 From 1b74a3db0c23423bb5a188eafb4346711d0b13a2 Mon Sep 17 00:00:00 2001 From: Adam King Date: Thu, 30 Sep 2021 09:32:31 -0400 Subject: [PATCH 0072/2866] mgr/cephadm: move handling of use_agent setting into agent.py The serve function itself should only really be calling other functions. The actual handling of the setting is better placed in the agent's own file. Signed-off-by: Adam King --- src/pybind/mgr/cephadm/agent.py | 23 +++++++++++++++++++++++ src/pybind/mgr/cephadm/serve.py | 19 ++----------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index d43f639264855..39f091a496b89 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -319,6 +319,29 @@ def _apply_agent(self) -> None: ) self.mgr.spec_store.save(spec) + def _handle_use_agent_setting(self) -> bool: + need_apply = False + if self.mgr.use_agent: + # on the off chance there are still agents hanging around from + # when we turned the config option off, we need to redeploy them + # we can tell they're in that state if we don't have a keyring for + # them in the host cache + for agent in self.mgr.cache.get_daemons_by_service('agent'): + if agent.hostname not in self.mgr.cache.agent_keys: + self.mgr._schedule_daemon_action(agent.name(), 'redeploy') + if 'agent' not in self.mgr.spec_store: + self.mgr.agent_helpers._apply_agent() + need_apply = True + else: + if 'agent' in self.mgr.spec_store: + self.mgr.spec_store.rm('agent') + need_apply = True + self.mgr.cache.agent_counter = {} + self.mgr.cache.agent_timestamp = {} + self.mgr.cache.agent_keys = {} + self.mgr.cache.agent_ports = {} + return need_apply + class SSLCerts: def __init__(self, mgr: "CephadmOrchestrator") -> None: diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index f8d4398b56f6b..b3048b1fb5641 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -91,23 +91,8 @@ def serve(self) -> None: self._purge_deleted_services() - if self.mgr.use_agent: - # on the off chance there are still agents hanging around from - # when we turned the config option off, we need to redeploy them - # we can tell they're in that state if we don't have a keyring for - # them in the host cache - for agent in self.mgr.cache.get_daemons_by_service('agent'): - if agent.hostname not in self.mgr.cache.agent_keys: - self.mgr._schedule_daemon_action(agent.name(), 'redeploy') - if 'agent' not in self.mgr.spec_store: - self.mgr.agent_helpers._apply_agent() - else: - if 'agent' in self.mgr.spec_store: - self.mgr.spec_store.rm('agent') - self.mgr.cache.agent_counter = {} - self.mgr.cache.agent_timestamp = {} - self.mgr.cache.agent_keys = {} - self.mgr.cache.agent_ports = {} + if self.mgr.agent_helpers._handle_use_agent_setting(): + continue if self.mgr.upgrade.continue_upgrade(): continue From 50ac0ec38d5d4e3c2e2ce2dafcccc3ea686ecba0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 30 Sep 2021 10:12:24 -0500 Subject: [PATCH 0073/2866] mgr/rook: describe_services: name rgw svc appropriately Set rgw_zone if present. Do not set rgw_realm because we don't know what it is from the CephObjectstore CRD. Signed-off-by: Sage Weil --- src/pybind/mgr/rook/module.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index 5a7f177d3a7bd..cea5ad8ef2835 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -299,9 +299,7 @@ def describe_service(self, # CephObjectstores all_zones = self.rook_cluster.get_resource("cephobjectstores") for zone in all_zones: - rgw_realm = zone['metadata']['name'] - rgw_zone = rgw_realm - svc = 'rgw.' + rgw_realm + svc = 'rgw.' + zone['metadata']['name'] if svc in spec: continue active = zone['spec']['gateway']['instances']; @@ -311,10 +309,10 @@ def describe_service(self, else: ssl = False port = zone['spec']['gateway']['port'] or 80 + rgw_zone = zone['spec'].get('zone', {}).get('name') or None spec[svc] = orchestrator.ServiceDescription( spec=RGWSpec( service_id=zone['metadata']['name'], - rgw_realm=rgw_realm, rgw_zone=rgw_zone, ssl=ssl, rgw_frontend_port=port, From f3d072b885bb9377876df6f7ab161e1a16fed39d Mon Sep 17 00:00:00 2001 From: Laura Flores Date: Thu, 30 Sep 2021 20:05:59 +0000 Subject: [PATCH 0074/2866] mgr/telemetry: remove pool name from telemetry perf report It is best to remove the pool names from the telemetry perf report, as these names could potentially count as sensitive information. Signed-off-by: Laura Flores --- src/pybind/mgr/telemetry/module.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py index 91596436a9d86..a0f7eb0b40a84 100644 --- a/src/pybind/mgr/telemetry/module.py +++ b/src/pybind/mgr/telemetry/module.py @@ -306,18 +306,16 @@ def get_stat_sum_per_pool(self) -> List[dict]: # Initialize 'result' list result: List[dict] = [] - # Create a list of tuples containing pool ids and their associated names - # that will later act as a queue, i.e.: - # pool_queue = [('1', '.mgr'), ('2', 'cephfs.a.meta'), ('3', 'cephfs.a.data')] + # Create a list of pool ids that will later act as a queue, i.e.: + # pool_queue = [1, 2, 3] osd_map = self.get('osd_map') - pool_queue: List[tuple] = [] + pool_queue = [] for pool in osd_map['pools']: - pool_queue.append((str(pool['pool']), pool['pool_name'])) + pool_queue.append(str(pool['pool'])) # Populate 'result', i.e.: # { # 'pool_id': '1' - # 'pool_name': '.mgr' # 'stats_sum': { # 'num_bytes': 36, # 'num_bytes_hit_set_archive': 0, @@ -327,11 +325,8 @@ def get_stat_sum_per_pool(self) -> List[dict]: # } # } while pool_queue: - # Pop a pool out of pool_queue - curr_pool = pool_queue.pop(0) - - # Get the current pool's id and name - curr_pool_id, curr_pool_name = curr_pool[0], curr_pool[1] + # Pop the current pool id out of pool_queue + curr_pool_id = pool_queue.pop(0) # Initialize a dict that will hold aggregated stats for the current pool compiled_stats_dict: Dict[str, Any] = defaultdict(lambda: defaultdict(int)) @@ -343,7 +338,6 @@ def get_stat_sum_per_pool(self) -> List[dict]: pool_id = pg['pgid'].split('.')[0] if pool_id == curr_pool_id: compiled_stats_dict['pool_id'] = int(pool_id) - compiled_stats_dict['pool_name'] = curr_pool_name for metric in pg['stat_sum']: compiled_stats_dict['stats_sum'][metric] += pg['stat_sum'][metric] else: From 8a16d95ae8f7b55af170e012373df5159f4b5973 Mon Sep 17 00:00:00 2001 From: Eunice Lee Date: Fri, 1 Oct 2021 13:00:03 -0700 Subject: [PATCH 0075/2866] doc: fix reference to non-existent stable branch Fixes: #50499 Signed-off-by: Eunice Lee (eunice337@gmail.com) --- doc/install/clone-source.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/install/clone-source.rst b/doc/install/clone-source.rst index da62ee93fa679..eac972d327308 100644 --- a/doc/install/clone-source.rst +++ b/doc/install/clone-source.rst @@ -93,7 +93,7 @@ will be on the ``master`` branch by default, which is the unstable development branch. You may choose other branches too. - ``master``: The unstable development branch. -- ``stable``: The bugfix branch. +- ``stable-release-name``: The name of the stable, :ref:`active release `. e.g. ``Pacific`` - ``next``: The release candidate branch. :: From 2a10be53475ce370e5b66d2ccc2f75ca5f36af8d Mon Sep 17 00:00:00 2001 From: Laura Flores Date: Mon, 4 Oct 2021 22:39:23 +0000 Subject: [PATCH 0076/2866] mon: simplify 'mgr module ls' output Fixes: https://tracker.ceph.com/issues/45322 Signed-off-by: Laura Flores --- doc/man/8/ceph.rst | 2 +- doc/mgr/administrator.rst | 3 ++- src/mon/MgrMonitor.cc | 15 ++++++++++++--- src/mon/MonCommands.h | 4 ++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/doc/man/8/ceph.rst b/doc/man/8/ceph.rst index 639d85e565d6f..af381df92ea8a 100644 --- a/doc/man/8/ceph.rst +++ b/doc/man/8/ceph.rst @@ -580,7 +580,7 @@ Subcommand ``module ls`` will list currently enabled manager modules (plugins). Usage:: - ceph mgr module ls + ceph mgr module ls {detail} Subcommand ``module enable`` will enable a manager module. Available modules are included in MgrMap and visible via ``mgr dump``. diff --git a/doc/mgr/administrator.rst b/doc/mgr/administrator.rst index 6fa563c62ad5e..6115d171c014c 100644 --- a/doc/mgr/administrator.rst +++ b/doc/mgr/administrator.rst @@ -57,7 +57,8 @@ Using modules ------------- Use the command ``ceph mgr module ls`` to see which modules are -available, and which are currently enabled. Enable or disable modules +available, and which are currently enabled. Use ``ceph mgr module ls detail`` +to view detailed metadata about disabled modules. Enable or disable modules using the commands ``ceph mgr module enable `` and ``ceph mgr module disable `` respectively. diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index ef246e648c6c1..b2dae3ce8412a 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -953,6 +953,7 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op) string prefix; cmd_getval(cmdmap, "prefix", prefix); + int r = 0; if (prefix == "mgr stat") { @@ -982,6 +983,9 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op) } f->flush(rdata); } else if (prefix == "mgr module ls") { + string detail; + cmd_getval(cmdmap, "detail", detail); + f->open_object_section("modules"); { f->open_array_section("always_on_modules"); @@ -1002,9 +1006,14 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op) for (auto& p : map.available_modules) { if (map.modules.count(p.name) == 0 && map.get_always_on_modules().count(p.name) == 0) { - // For disabled modules, we show the full info, to - // give a hint about whether enabling it will work - p.dump(f.get()); + if (detail == "detail") { + // For disabled modules, we show the full info if the detail + // parameter is enabled, to give a hint about whether enabling it will work + p.dump(f.get()); + } else { + // Otherwise, we give a shortened summary by default + f->dump_string("module", p.name); + } } } f->close_section(); diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 1db9879873364..78129f336e256 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1231,8 +1231,8 @@ COMMAND("mgr dump " "mgr", "r") COMMAND("mgr fail name=who,type=CephString,req=false", "treat the named manager daemon as failed", "mgr", "rw") -COMMAND("mgr module ls", - "list active mgr modules", "mgr", "r") +COMMAND("mgr module ls name=detail,type=CephChoices,strings=detail,req=false", + "list active mgr modules", "mgr", "r") COMMAND("mgr services", "list service endpoints provided by mgr modules", "mgr", "r") From 05902d943bba4a64abbd943270b56cbdd1650e62 Mon Sep 17 00:00:00 2001 From: Yaarit Hatuka Date: Mon, 4 Oct 2021 18:50:01 +0000 Subject: [PATCH 0077/2866] mgr/devicehealth: fix missing timezone from time delta calculation An error occurs when subtracting a datetime object that is offset-naive (i.e. unaware of timezone) from a datetime object which is offset-aware. datetime.utcnow() is missing timezone info, e.g.: '2021-09-22 13:18:45.021712', while life_expectancy_max is in the format of: '2021-09-28 00:00:00.000000+00:00', hence we need to add timezone info to the former when calculating their time delta. Please note that we calculate time delta using `datetime.utcnow()` in `serve()` in this module, but there we refer to the delta in seconds, which works fine. Fixes: https://tracker.ceph.com/issues/52327 Signed-off-by: Yaarit Hatuka --- src/pybind/mgr/devicehealth/module.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index 9812c91a87cc7..9b6c1fe431c50 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -9,7 +9,7 @@ import rados import re from threading import Event -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import cast, Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union TIME_FORMAT = '%Y%m%d-%H%M%S' @@ -588,7 +588,7 @@ def check_health(self) -> Tuple[int, str, str]: devs = self.get("devices") osds_in = {} osds_out = {} - now = datetime.utcnow() + now = datetime.now(timezone.utc) # e.g. '2021-09-22 13:18:45.021712+00:00' osdmap = self.get("osd_map") assert osdmap is not None for dev in devs['devices']: @@ -602,7 +602,7 @@ def check_health(self) -> Tuple[int, str, str]: continue # life_expectancy_(min/max) is in the format of: # '%Y-%m-%dT%H:%M:%S.%f%z', e.g.: - # '2019-01-20T21:12:12.000000Z' + # '2019-01-20 21:12:12.000000+00:00' life_expectancy_max = datetime.strptime( dev['life_expectancy_max'], '%Y-%m-%dT%H:%M:%S.%f%z') From c4c9f391aaa6983cf4ce0af144b70ecd27e76f45 Mon Sep 17 00:00:00 2001 From: Cory Snyder Date: Wed, 22 Sep 2021 10:14:12 -0400 Subject: [PATCH 0078/2866] rgw: add abstraction for ops log destination and add file logger Adds an OpsLogSink abstraction for ops log destinations. Also implements this abstraction for a file logger since it's easier to use files vs. sockets with containers. Fixes: https://tracker.ceph.com/issues/48752 Signed-off-by: Cory Snyder --- src/common/options/rgw.yaml.in | 13 +++ src/rgw/librgw.cc | 19 ++- src/rgw/rgw_lib.h | 4 +- src/rgw/rgw_log.cc | 203 ++++++++++++++++++++++++++------- src/rgw/rgw_log.h | 69 ++++++++++- src/rgw/rgw_lua_request.cc | 20 ++-- src/rgw/rgw_lua_request.h | 4 +- src/rgw/rgw_main.cc | 16 ++- src/rgw/rgw_process.cc | 4 +- src/rgw/rgw_process.h | 6 +- src/test/fio/fio_librgw.cc | 2 +- 11 files changed, 282 insertions(+), 78 deletions(-) diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index 961d22b9e6d71..d116b05b67f85 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -1438,6 +1438,7 @@ options: - rgw_log_object_name - rgw_ops_log_rados - rgw_ops_log_socket_path + - rgw_ops_log_file_path with_legacy: true # enable logging bandwidth usage - name: rgw_enable_usage_log @@ -1478,6 +1479,18 @@ options: - rgw_enable_ops_log - rgw_ops_log_data_backlog with_legacy: true +# path to file where ops log can go +- name: rgw_ops_log_file_path + type: str + level: advanced + desc: File-system path for ops log. + long_desc: Path to file that RGW will log ops logs to. + fmt_desc: The file-system path for writing operations logs. + services: + - rgw + see_also: + - rgw_enable_ops_log + with_legacy: true # max data backlog for ops log - name: rgw_ops_log_data_backlog type: size diff --git a/src/rgw/librgw.cc b/src/rgw/librgw.cc index e22010cb452d5..18c4140e60b31 100644 --- a/src/rgw/librgw.cc +++ b/src/rgw/librgw.cc @@ -340,8 +340,7 @@ namespace rgw { << e.what() << dendl; } if (should_log) { - rgw_log_op(store, nullptr /* !rest */, s, - (op ? op->name() : "unknown"), olog); + rgw_log_op(nullptr /* !rest */, s, (op ? op->name() : "unknown"), olog); } int http_ret = s->err.http_ret; @@ -589,10 +588,20 @@ namespace rgw { // XXX ex-RGWRESTMgr_lib, mgr->set_logging(true) + OpsLogManifold* olog_manifold = new OpsLogManifold(); if (!g_conf()->rgw_ops_log_socket_path.empty()) { - olog = new OpsLogSocket(g_ceph_context, g_conf()->rgw_ops_log_data_backlog); - olog->init(g_conf()->rgw_ops_log_socket_path); + OpsLogSocket* olog_socket = new OpsLogSocket(g_ceph_context, g_conf()->rgw_ops_log_data_backlog); + olog_socket->init(g_conf()->rgw_ops_log_socket_path); + olog_manifold->add_sink(olog_socket); } + OpsLogFile* ops_log_file; + if (!g_conf()->rgw_ops_log_file_path.empty()) { + ops_log_file = new OpsLogFile(g_ceph_context, g_conf()->rgw_ops_log_file_path, g_conf()->rgw_ops_log_data_backlog); + ops_log_file->start(); + olog_manifold->add_sink(ops_log_file); + } + olog_manifold->add_sink(new OpsLogRados(store)); + olog = olog_manifold; int port = 80; RGWProcessEnv env = { store, &rest, olog, port }; @@ -653,7 +662,7 @@ namespace rgw { shutdown_async_signal_handler(); rgw_log_usage_finalize(); - + delete olog; StoreManager::close_storage(store); diff --git a/src/rgw/rgw_lib.h b/src/rgw/rgw_lib.h index 710c76c803158..0c5e9e721685f 100644 --- a/src/rgw/rgw_lib.h +++ b/src/rgw/rgw_lib.h @@ -20,7 +20,7 @@ #define dout_subsys ceph_subsys_rgw -class OpsLogSocket; +class OpsLogSink; namespace rgw { @@ -29,7 +29,7 @@ namespace rgw { class RGWLib : public DoutPrefixProvider { RGWFrontendConfig* fec; RGWLibFrontend* fe; - OpsLogSocket* olog; + OpsLogSink* olog; rgw::LDAPHelper* ldh{nullptr}; RGWREST rest; // XXX needed for RGWProcessEnv rgw::sal::Store* store; diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc index da6eb1e6ad337..aee079daf7f0e 100644 --- a/src/rgw/rgw_log.cc +++ b/src/rgw/rgw_log.cc @@ -17,6 +17,9 @@ #include "services/svc_zone.h" +#include +#include + #define dout_subsys ceph_subsys_rgw using namespace std; @@ -317,32 +320,131 @@ void rgw_format_ops_log_entry(struct rgw_log_entry& entry, Formatter *formatter) formatter->close_section(); } -void OpsLogSocket::formatter_to_bl(bufferlist& bl) +OpsLogManifold::~OpsLogManifold() { - stringstream ss; - formatter->flush(ss); - const string& s = ss.str(); + for (const auto &sink : sinks) { + delete sink; + } +} - bl.append(s); +void OpsLogManifold::add_sink(OpsLogSink* sink) +{ + sinks.push_back(sink); } -void OpsLogSocket::init_connection(bufferlist& bl) +int OpsLogManifold::log(struct req_state* s, struct rgw_log_entry& entry) { - bl.append("["); + int ret = 0; + for (const auto &sink : sinks) { + if (sink->log(s, entry) < 0) { + ret = -1; + } + } + return ret; } -OpsLogSocket::OpsLogSocket(CephContext *cct, uint64_t _backlog) : OutputDataSocket(cct, _backlog) +OpsLogFile::OpsLogFile(CephContext* cct, std::string& path, uint64_t max_data_size) : + cct(cct), file(path, std::ofstream::app), data_size(0), max_data_size(max_data_size) { - formatter = new JSONFormatter; - delim.append(",\n"); } -OpsLogSocket::~OpsLogSocket() +void OpsLogFile::flush() +{ + std::scoped_lock flush_lock(flush_mutex); + { + std::scoped_lock log_lock(log_mutex); + assert(flush_buffer.empty()); + flush_buffer.swap(log_buffer); + data_size = 0; + } + for (auto bl : flush_buffer) { + int try_num = 0; + while (true) { + bl.write_stream(file); + if (!file) { + ldpp_dout(this, 0) << "ERROR: failed to log RGW ops log file entry" << dendl; + file.clear(); + if (stopped) { + break; + } + int sleep_time_secs = std::min((int) pow(2, try_num), 60); + std::this_thread::sleep_for(std::chrono::seconds(sleep_time_secs)); + try_num++; + } else { + break; + } + } + } + flush_buffer.clear(); + file << std::endl; +} + +void* OpsLogFile::entry() { + std::unique_lock lock(log_mutex); + while (!stopped) { + if (!log_buffer.empty()) { + lock.unlock(); + flush(); + lock.lock(); + continue; + } + cond_flush.wait(lock); + } + flush(); + return NULL; +} + +void OpsLogFile::start() { + stopped = false; + create("ops_log_file"); +} + +void OpsLogFile::stop() { + { + cond_flush.notify_one(); + stopped = true; + } + join(); +} + +OpsLogFile::~OpsLogFile() { + if (!stopped) { + stop(); + } + file.close(); +} + +int OpsLogFile::log_json(struct req_state* s, bufferlist& bl) +{ + std::unique_lock lock(log_mutex); + if (data_size + bl.length() >= max_data_size) { + ldout(s->cct, 0) << "ERROR: RGW ops log file buffer too full, dropping log for txn: " << s->trans_id << dendl; + return -1; + } + log_buffer.push_back(bl); + data_size += bl.length(); + cond_flush.notify_all(); + return 0; +} + +JsonOpsLogSink::JsonOpsLogSink() { + formatter = new JSONFormatter; +} + +JsonOpsLogSink::~JsonOpsLogSink() { delete formatter; } -void OpsLogSocket::log(struct rgw_log_entry& entry) +void JsonOpsLogSink::formatter_to_bl(bufferlist& bl) +{ + stringstream ss; + formatter->flush(ss); + const string& s = ss.str(); + bl.append(s); +} + +int JsonOpsLogSink::log(struct req_state* s, struct rgw_log_entry& entry) { bufferlist bl; @@ -351,11 +453,53 @@ void OpsLogSocket::log(struct rgw_log_entry& entry) formatter_to_bl(bl); lock.unlock(); + return log_json(s, bl); +} + +void OpsLogSocket::init_connection(bufferlist& bl) +{ + bl.append("["); +} + +OpsLogSocket::OpsLogSocket(CephContext *cct, uint64_t _backlog) : OutputDataSocket(cct, _backlog) +{ + delim.append(",\n"); +} + +int OpsLogSocket::log_json(struct req_state* s, bufferlist& bl) +{ append_output(bl); + return 0; } -int rgw_log_op(rgw::sal::Store* store, RGWREST* const rest, struct req_state *s, - const string& op_name, OpsLogSocket *olog) +OpsLogRados::OpsLogRados(rgw::sal::Store* store): store(store) +{ +} + +int OpsLogRados::log(struct req_state* s, struct rgw_log_entry& entry) +{ + if (!s->cct->_conf->rgw_ops_log_rados) { + return 0; + } + bufferlist bl; + encode(entry, bl); + + struct tm bdt; + time_t t = req_state::Clock::to_time_t(entry.time); + if (s->cct->_conf->rgw_log_object_name_utc) + gmtime_r(&t, &bdt); + else + localtime_r(&t, &bdt); + string oid = render_log_object_name(s->cct->_conf->rgw_log_object_name, &bdt, + entry.bucket_id, entry.bucket); + if (store->log_op(s, oid, bl) < 0) { + ldpp_dout(s, 0) << "ERROR: failed to log RADOS RGW ops log entry for txn: " << s->trans_id << dendl; + return -1; + } + return 0; +} + +int rgw_log_op(RGWREST* const rest, struct req_state *s, const string& op_name, OpsLogSink *olog) { struct rgw_log_entry entry; string bucket_id; @@ -473,37 +617,14 @@ int rgw_log_op(rgw::sal::Store* store, RGWREST* const rest, struct req_state *s, char buf[16]; snprintf(buf, sizeof(buf), "%d", s->err.http_ret); entry.http_status = buf; - } else + } else { entry.http_status = "200"; // default - + } entry.error_code = s->err.err_code; entry.bucket_id = bucket_id; entry.trans_id = s->trans_id; - - bufferlist bl; - encode(entry, bl); - - struct tm bdt; - time_t t = req_state::Clock::to_time_t(entry.time); - if (s->cct->_conf->rgw_log_object_name_utc) - gmtime_r(&t, &bdt); - else - localtime_r(&t, &bdt); - - int ret = 0; - - if (s->cct->_conf->rgw_ops_log_rados) { - string oid = render_log_object_name(s->cct->_conf->rgw_log_object_name, &bdt, - entry.bucket_id, entry.bucket); - ret = store->log_op(s, oid, bl); - } - if (olog) { - olog->log(entry); + return olog->log(s, entry); } - if (ret < 0) - ldpp_dout(s, 0) << "ERROR: failed to log entry" << dendl; - - return ret; + return 0; } - diff --git a/src/rgw/rgw_log.h b/src/rgw/rgw_log.h index 2469282b33012..5ee411e8cfaec 100644 --- a/src/rgw/rgw_log.h +++ b/src/rgw/rgw_log.h @@ -7,6 +7,10 @@ #include #include "rgw_common.h" #include "common/OutputDataSocket.h" +#include +#include + +#define dout_subsys ceph_subsys_rgw namespace rgw { namespace sal { class Store; @@ -132,26 +136,79 @@ struct rgw_log_entry { }; WRITE_CLASS_ENCODER(rgw_log_entry) -class OpsLogSocket : public OutputDataSocket { +class OpsLogSink { +public: + virtual int log(struct req_state* s, struct rgw_log_entry& entry) = 0; + virtual ~OpsLogSink() = default; +}; + +class OpsLogManifold: public OpsLogSink { + std::vector sinks; +public: + ~OpsLogManifold() override; + void add_sink(OpsLogSink* sink); + int log(struct req_state* s, struct rgw_log_entry& entry) override; +}; + +class JsonOpsLogSink : public OpsLogSink { ceph::Formatter *formatter; - ceph::mutex lock = ceph::make_mutex("OpsLogSocket"); + ceph::mutex lock = ceph::make_mutex("JsonOpsLogSink"); void formatter_to_bl(bufferlist& bl); +protected: + virtual int log_json(struct req_state* s, bufferlist& bl) = 0; +public: + JsonOpsLogSink(); + ~JsonOpsLogSink() override; + int log(struct req_state* s, struct rgw_log_entry& entry) override; +}; + +class OpsLogFile : public JsonOpsLogSink, public Thread, public DoutPrefixProvider { + CephContext* cct; + ceph::mutex log_mutex = ceph::make_mutex("OpsLogFile_log"); + ceph::mutex flush_mutex = ceph::make_mutex("OpsLogFile_flush"); + std::vector log_buffer; + std::vector flush_buffer; + std::condition_variable cond_flush; + std::ofstream file; + bool stopped; + uint64_t data_size; + uint64_t max_data_size; + + void flush(); +protected: + int log_json(struct req_state* s, bufferlist& bl) override; + void *entry() override; +public: + OpsLogFile(CephContext* cct, std::string& path, uint64_t max_data_size); + ~OpsLogFile() override; + CephContext *get_cct() const override { return cct; } + unsigned get_subsys() const override { return dout_subsys; } + std::ostream& gen_prefix(std::ostream& out) const override { return out << "rgw OpsLogFile: "; } + void start(); + void stop(); +}; +class OpsLogSocket : public OutputDataSocket, public JsonOpsLogSink { protected: + int log_json(struct req_state* s, bufferlist& bl) override; void init_connection(bufferlist& bl) override; public: OpsLogSocket(CephContext *cct, uint64_t _backlog); - ~OpsLogSocket() override; +}; - void log(struct rgw_log_entry& entry); +class OpsLogRados : public OpsLogSink { + rgw::sal::Store* store; +public: + OpsLogRados(rgw::sal::Store* store); + int log(struct req_state* s, struct rgw_log_entry& entry) override; }; class RGWREST; -int rgw_log_op(rgw::sal::Store* store, RGWREST* const rest, struct req_state* s, - const std::string& op_name, OpsLogSocket* olog); +int rgw_log_op(RGWREST* const rest, struct req_state* s, + const std::string& op_name, OpsLogSink* olog); void rgw_log_usage_init(CephContext* cct, rgw::sal::Store* store); void rgw_log_usage_finalize(); void rgw_format_ops_log_entry(struct rgw_log_entry& entry, diff --git a/src/rgw/rgw_lua_request.cc b/src/rgw/rgw_lua_request.cc index a8ea4ee19b2c9..7d6d72e0368b7 100644 --- a/src/rgw/rgw_lua_request.cc +++ b/src/rgw/rgw_lua_request.cc @@ -24,16 +24,15 @@ constexpr const char* RequestLogAction{"Log"}; int RequestLog(lua_State* L) { - const auto store = reinterpret_cast(lua_touserdata(L, lua_upvalueindex(1))); - const auto rest = reinterpret_cast(lua_touserdata(L, lua_upvalueindex(2))); - const auto olog = reinterpret_cast(lua_touserdata(L, lua_upvalueindex(3))); - const auto s = reinterpret_cast(lua_touserdata(L, lua_upvalueindex(4))); - const std::string op_name(reinterpret_cast(lua_touserdata(L, lua_upvalueindex(5)))); - if (store && s) { - const auto rc = rgw_log_op(store, rest, s, op_name, olog); + const auto rest = reinterpret_cast(lua_touserdata(L, lua_upvalueindex(1))); + const auto olog = reinterpret_cast(lua_touserdata(L, lua_upvalueindex(2))); + const auto s = reinterpret_cast(lua_touserdata(L, lua_upvalueindex(3))); + const std::string op_name(reinterpret_cast(lua_touserdata(L, lua_upvalueindex(4)))); + if (s) { + const auto rc = rgw_log_op(rest, s, op_name, olog); lua_pushinteger(L, rc); } else { - ldpp_dout(s, 1) << "Lua ERROR: missing rados store, cannot use ops log" << dendl; + ldpp_dout(s, 1) << "Lua ERROR: missing request state, cannot use ops log" << dendl; lua_pushinteger(L, -EINVAL); } @@ -777,7 +776,7 @@ struct RequestMetaTable : public EmptyMetaTable { int execute( rgw::sal::Store* store, RGWREST* rest, - OpsLogSocket* olog, + OpsLogSink* olog, req_state* s, const char* op_name, const std::string& script) @@ -799,12 +798,11 @@ int execute( lua_getglobal(L, RequestMetaTable::TableName().c_str()); ceph_assert(lua_istable(L, -1)); pushstring(L, RequestLogAction); - lua_pushlightuserdata(L, store); lua_pushlightuserdata(L, rest); lua_pushlightuserdata(L, olog); lua_pushlightuserdata(L, s); lua_pushlightuserdata(L, const_cast(op_name)); - lua_pushcclosure(L, RequestLog, FIVE_UPVALS); + lua_pushcclosure(L, RequestLog, FOUR_UPVALS); lua_rawset(L, -3); try { diff --git a/src/rgw/rgw_lua_request.h b/src/rgw/rgw_lua_request.h index d31e81b5a4fd5..fcf0673f3ba7b 100644 --- a/src/rgw/rgw_lua_request.h +++ b/src/rgw/rgw_lua_request.h @@ -5,7 +5,7 @@ class req_state; class RGWREST; -class OpsLogSocket; +class OpsLogSink; namespace rgw::sal { class Store; } @@ -16,7 +16,7 @@ namespace rgw::lua::request { int execute( rgw::sal::Store* store, RGWREST* rest, - OpsLogSocket* olog, + OpsLogSink* olog, req_state *s, const char* op_name, const std::string& script); diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc index c44b0cba61cab..507c07ef2f972 100644 --- a/src/rgw/rgw_main.cc +++ b/src/rgw/rgw_main.cc @@ -518,12 +518,19 @@ int radosgw_Main(int argc, const char **argv) rgw::dmclock::SchedulerCtx sched_ctx{cct.get()}; - OpsLogSocket *olog = NULL; - + OpsLogManifold *olog = new OpsLogManifold(); if (!g_conf()->rgw_ops_log_socket_path.empty()) { - olog = new OpsLogSocket(g_ceph_context, g_conf()->rgw_ops_log_data_backlog); - olog->init(g_conf()->rgw_ops_log_socket_path); + OpsLogSocket* olog_socket = new OpsLogSocket(g_ceph_context, g_conf()->rgw_ops_log_data_backlog); + olog_socket->init(g_conf()->rgw_ops_log_socket_path); + olog->add_sink(olog_socket); + } + OpsLogFile* ops_log_file; + if (!g_conf()->rgw_ops_log_file_path.empty()) { + ops_log_file = new OpsLogFile(g_ceph_context, g_conf()->rgw_ops_log_file_path, g_conf()->rgw_ops_log_data_backlog); + ops_log_file->start(); + olog->add_sink(ops_log_file); } + olog->add_sink(new OpsLogRados(store)); r = signal_fd_init(); if (r < 0) { @@ -671,7 +678,6 @@ int radosgw_Main(int argc, const char **argv) shutdown_async_signal_handler(); rgw_log_usage_finalize(); - delete olog; StoreManager::close_storage(store); diff --git a/src/rgw/rgw_process.cc b/src/rgw/rgw_process.cc index 9cf78fe8809b7..3f8c9c33efe6a 100644 --- a/src/rgw/rgw_process.cc +++ b/src/rgw/rgw_process.cc @@ -189,7 +189,7 @@ int process_request(rgw::sal::Store* const store, const std::string& frontend_prefix, const rgw_auth_registry_t& auth_registry, RGWRestfulIO* const client_io, - OpsLogSocket* const olog, + OpsLogSink* const olog, optional_yield yield, rgw::dmclock::Scheduler *scheduler, string* user, @@ -356,7 +356,7 @@ int process_request(rgw::sal::Store* const store, } if (should_log) { - rgw_log_op(store, rest, s, (op ? op->name() : "unknown"), olog); + rgw_log_op(rest, s, (op ? op->name() : "unknown"), olog); } if (http_ret != nullptr) { diff --git a/src/rgw/rgw_process.h b/src/rgw/rgw_process.h index f8a97d16c29d4..1aac4a6cdf1bc 100644 --- a/src/rgw/rgw_process.h +++ b/src/rgw/rgw_process.h @@ -34,7 +34,7 @@ namespace rgw::dmclock { struct RGWProcessEnv { rgw::sal::Store* store; RGWREST *rest; - OpsLogSocket *olog; + OpsLogSink *olog; int port; std::string uri_prefix; std::shared_ptr auth_registry; @@ -49,7 +49,7 @@ class RGWProcess { CephContext *cct; rgw::sal::Store* store; rgw_auth_registry_ptr_t auth_registry; - OpsLogSocket* olog; + OpsLogSink* olog; ThreadPool m_tp; Throttle req_throttle; RGWREST* rest; @@ -169,7 +169,7 @@ extern int process_request(rgw::sal::Store* store, const std::string& frontend_prefix, const rgw_auth_registry_t& auth_registry, RGWRestfulIO* client_io, - OpsLogSocket* olog, + OpsLogSink* olog, optional_yield y, rgw::dmclock::Scheduler *scheduler, std::string* user, diff --git a/src/test/fio/fio_librgw.cc b/src/test/fio/fio_librgw.cc index 0def8aac7240b..b088b68f96c5c 100644 --- a/src/test/fio/fio_librgw.cc +++ b/src/test/fio/fio_librgw.cc @@ -380,7 +380,7 @@ namespace { { librgw_data* data = static_cast(td->io_ops_data); const char* object = io_u->file->file_name; - struct rgw_file_handle* object_fh; + struct rgw_file_handle* object_fh = nullptr; size_t nbytes; int r = 0; From f929556bdd756c9258e6283a29d85b82395296cc Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Tue, 5 Oct 2021 12:27:44 +0000 Subject: [PATCH 0079/2866] common/ceph_time: fix wrong seconds output in exact_timespan_str() For example: 170.567s fixed from "2m500.567s" to "2m50.567s" Fixes: https://tracker.ceph.com/issues/52815 Signed-off-by: Ronen Friedman --- src/common/ceph_time.cc | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/common/ceph_time.cc b/src/common/ceph_time.cc index 1b8c3ae08a5e9..4af48a8a2a2fe 100644 --- a/src/common/ceph_time.cc +++ b/src/common/ceph_time.cc @@ -149,11 +149,11 @@ std::string timespan_str(timespan t) // that isn't as lame as this one! uint64_t nsec = std::chrono::nanoseconds(t).count(); std::ostringstream ss; - if (nsec < 2000000000) { - ss << ((float)nsec / 1000000000) << "s"; + if (nsec < 2'000'000'000) { + ss << ((float)nsec / 1'000'000'000) << "s"; return ss.str(); } - uint64_t sec = nsec / 1000000000; + uint64_t sec = nsec / 1'000'000'000; if (sec < 120) { ss << sec << "s"; return ss.str(); @@ -191,8 +191,8 @@ std::string timespan_str(timespan t) std::string exact_timespan_str(timespan t) { uint64_t nsec = std::chrono::nanoseconds(t).count(); - uint64_t sec = nsec / 1000000000; - nsec %= 1000000000; + uint64_t sec = nsec / 1'000'000'000; + nsec %= 1'000'000'000; uint64_t yr = sec / (60 * 60 * 24 * 365); std::ostringstream ss; if (yr) { @@ -224,14 +224,12 @@ std::string exact_timespan_str(timespan t) ss << min << "m"; sec -= min * 60; } - if (sec) { - ss << sec; - } - if (nsec) { - ss << ((float)nsec / 1000000000); - } if (sec || nsec) { - ss << "s"; + if (nsec) { + ss << (((float)nsec / 1'000'000'000) + sec) << "s"; + } else { + ss << sec << "s"; + } } return ss.str(); } From 4affb5c7029f6b83d640aa7b7206d9cf61e75f1d Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 23 Sep 2021 19:49:31 -0400 Subject: [PATCH 0080/2866] pybind/mgr/cephadm: always do mds upgrade sequence Minor versions also require this sequence. Signed-off-by: Patrick Donnelly --- src/pybind/mgr/cephadm/upgrade.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index d7162941985d3..e9148a595f7eb 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -356,24 +356,6 @@ def _prepare_for_mds_upgrade( target_major: str, need_upgrade: List[DaemonDescription] ) -> bool: - # are any daemons running a different major version? - scale_down = False - for name, info in self.mgr.get("mds_metadata").items(): - version = info.get("ceph_version_short") - major_version = None - if version: - major_version = version.split('.')[0] - if not major_version: - self.mgr.log.info('Upgrade: mds.%s version is not known, will retry' % name) - time.sleep(5) - return False - if int(major_version) < int(target_major): - scale_down = True - - if not scale_down: - self.mgr.log.debug('Upgrade: All MDS daemons run same major version') - return True - # scale down all filesystems to 1 MDS assert self.upgrade_state if not self.upgrade_state.fs_original_max_mds: From bca21f01ce3bb32e0951f0fe15da88a81750a191 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Fri, 17 Sep 2021 20:15:01 -0400 Subject: [PATCH 0081/2866] pybind/mgr/cephadm: disable allow_standby_replay during CephFS upgrade Following procedure in [1]. Also: harden checks for active. Ensure "up" and "in" are both [0]. There should be no standby-replay daemon. [1] https://docs.ceph.com/en/pacific/cephfs/upgrading/ Fixes: https://tracker.ceph.com/issues/52654 Signed-off-by: Patrick Donnelly --- src/pybind/mgr/cephadm/upgrade.py | 110 +++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 32 deletions(-) diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index e9148a595f7eb..19f377bbb456c 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -16,6 +16,9 @@ logger = logging.getLogger(__name__) +# from ceph_fs.h +CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1 << 5) + def normalize_image_digest(digest: str, default_registry: str) -> str: # normal case: @@ -41,6 +44,7 @@ def __init__(self, error: Optional[str] = None, paused: Optional[bool] = None, fs_original_max_mds: Optional[Dict[str, int]] = None, + fs_original_allow_standby_replay: Optional[Dict[str, bool]] = None ): self._target_name: str = target_name # Use CephadmUpgrade.target_image instead. self.progress_id: str = progress_id @@ -50,6 +54,7 @@ def __init__(self, self.error: Optional[str] = error self.paused: bool = paused or False self.fs_original_max_mds: Optional[Dict[str, int]] = fs_original_max_mds + self.fs_original_allow_standby_replay: Optional[Dict[str, bool]] = fs_original_allow_standby_replay def to_json(self) -> dict: return { @@ -59,6 +64,7 @@ def to_json(self) -> dict: 'target_digests': self.target_digests, 'target_version': self.target_version, 'fs_original_max_mds': self.fs_original_max_mds, + 'fs_original_allow_standby_replay': self.fs_original_allow_standby_replay, 'error': self.error, 'paused': self.paused, } @@ -360,20 +366,39 @@ def _prepare_for_mds_upgrade( assert self.upgrade_state if not self.upgrade_state.fs_original_max_mds: self.upgrade_state.fs_original_max_mds = {} + if not self.upgrade_state.fs_original_allow_standby_replay: + self.upgrade_state.fs_original_allow_standby_replay = {} fsmap = self.mgr.get("fs_map") continue_upgrade = True - for i in fsmap.get('filesystems', []): - fs = i["mdsmap"] - fs_id = i["id"] - fs_name = fs["fs_name"] + for fs in fsmap.get('filesystems', []): + fscid = fs["id"] + mdsmap = fs["mdsmap"] + fs_name = mdsmap["fs_name"] + + # disable allow_standby_replay? + if mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY: + self.mgr.log.info('Upgrade: Disabling standby-replay for filesystem %s' % ( + fs_name + )) + if fscid not in self.upgrade_state.fs_original_allow_standby_replay: + self.upgrade_state.fs_original_allow_standby_replay[fscid] = True + self._save_upgrade_state() + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'allow_standby_replay', + 'val': '0', + }) + continue_upgrade = False + continue # scale down this filesystem? - if fs["max_mds"] > 1: + if mdsmap["max_mds"] > 1: self.mgr.log.info('Upgrade: Scaling down filesystem %s' % ( fs_name )) - if fs_id not in self.upgrade_state.fs_original_max_mds: - self.upgrade_state.fs_original_max_mds[fs_id] = fs['max_mds'] + if fscid not in self.upgrade_state.fs_original_max_mds: + self.upgrade_state.fs_original_max_mds[fscid] = mdsmap['max_mds'] self._save_upgrade_state() ret, out, err = self.mgr.check_mon_command({ 'prefix': 'fs set', @@ -384,13 +409,15 @@ def _prepare_for_mds_upgrade( continue_upgrade = False continue - if len(fs['info']) > 1: - self.mgr.log.info('Upgrade: Waiting for fs %s to scale down to 1 MDS' % (fs_name)) + if not (mdsmap['in'] == [0] and len(mdsmap['up']) == 1): + self.mgr.log.info('Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (fs_name)) time.sleep(10) continue_upgrade = False continue - lone_mds = list(fs['info'].values())[0] + mdss = list(mdsmap['info'].values()) + assert len(mdss) == 1 + lone_mds = mdss[0] if lone_mds['state'] != 'up:active': self.mgr.log.info('Upgrade: Waiting for mds.%s to be up:active (currently %s)' % ( lone_mds['name'], @@ -420,9 +447,9 @@ def _enough_mds_for_ok_to_stop(self, mds_daemon: DaemonDescription) -> bool: # find fs this mds daemon belongs to fsmap = self.mgr.get("fs_map") - for i in fsmap.get('filesystems', []): - fs = i["mdsmap"] - fs_name = fs["fs_name"] + for fs in fsmap.get('filesystems', []): + mdsmap = fs["mdsmap"] + fs_name = mdsmap["fs_name"] assert mds_daemon.daemon_id if fs_name != mds_daemon.service_name().split('.', 1)[1]: @@ -434,7 +461,7 @@ def _enough_mds_for_ok_to_stop(self, mds_daemon: DaemonDescription) -> bool: [daemon for daemon in self.mgr.cache.get_daemons_by_service(mds_daemon.service_name())]) # standby mds daemons for this fs? - if fs["max_mds"] < mds_count: + if mdsmap["max_mds"] < mds_count: return True return False @@ -761,24 +788,43 @@ def _do_upgrade(self): }) # complete mds upgrade? - if daemon_type == 'mds' and self.upgrade_state.fs_original_max_mds: - for i in self.mgr.get("fs_map")['filesystems']: - fs_id = i["id"] - fs_name = i['mdsmap']['fs_name'] - new_max = self.upgrade_state.fs_original_max_mds.get(fs_id) - if new_max: - self.mgr.log.info('Upgrade: Scaling up filesystem %s max_mds to %d' % ( - fs_name, new_max - )) - ret, _, err = self.mgr.check_mon_command({ - 'prefix': 'fs set', - 'fs_name': fs_name, - 'var': 'max_mds', - 'val': str(new_max), - }) - - self.upgrade_state.fs_original_max_mds = {} - self._save_upgrade_state() + if daemon_type == 'mds': + if self.upgrade_state.fs_original_max_mds: + for fs in self.mgr.get("fs_map")['filesystems']: + fscid = fs["id"] + fs_name = fs['mdsmap']['fs_name'] + new_max = self.upgrade_state.fs_original_max_mds.get(fscid, 1) + if new_max > 1: + self.mgr.log.info('Upgrade: Scaling up filesystem %s max_mds to %d' % ( + fs_name, new_max + )) + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'max_mds', + 'val': str(new_max), + }) + + self.upgrade_state.fs_original_max_mds = {} + self._save_upgrade_state() + if self.upgrade_state.fs_original_allow_standby_replay: + for fs in self.mgr.get("fs_map")['filesystems']: + fscid = fs["id"] + fs_name = fs['mdsmap']['fs_name'] + asr = self.upgrade_state.fs_original_allow_standby_replay.get(fscid, False) + if asr: + self.mgr.log.info('Upgrade: Enabling allow_standby_replay on filesystem %s' % ( + fs_name + )) + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'allow_standby_replay', + 'val': '1' + }) + + self.upgrade_state.fs_original_allow_standby_replay = {} + self._save_upgrade_state() # clean up logger.info('Upgrade: Finalizing container_image settings') From 7812cfb6744fc3bce50e26aa7dd6a4e47a43bb23 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 5 Oct 2021 13:31:02 -0400 Subject: [PATCH 0082/2866] qa: move CephManager cluster instantiation to subtask This needs to be available for the cephfs_setup task so administration mounts can run ceph commands, potentially through `cephadm shell`. Signed-off-by: Patrick Donnelly --- qa/tasks/ceph.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py index 87a0f84eb9337..db86d6d53cc46 100644 --- a/qa/tasks/ceph.py +++ b/qa/tasks/ceph.py @@ -376,6 +376,20 @@ def crush_setup(ctx, config): yield +@contextlib.contextmanager +def setup_manager(ctx, config): + first_mon = teuthology.get_first_mon(ctx, config, config['cluster']) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + if not hasattr(ctx, 'managers'): + ctx.managers = {} + ctx.managers[config['cluster']] = CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager.' + config['cluster']), + cluster=config['cluster'], + ) + yield + @contextlib.contextmanager def create_rbd_pool(ctx, config): cluster_name = config['cluster'] @@ -1866,6 +1880,7 @@ def task(ctx, config): lambda: run_daemon(ctx=ctx, config=config, type_='mgr'), lambda: crush_setup(ctx=ctx, config=config), lambda: run_daemon(ctx=ctx, config=config, type_='osd'), + lambda: setup_manager(ctx=ctx, config=config), lambda: create_rbd_pool(ctx=ctx, config=config), lambda: run_daemon(ctx=ctx, config=config, type_='mds'), lambda: cephfs_setup(ctx=ctx, config=config), @@ -1873,17 +1888,6 @@ def task(ctx, config): ] with contextutil.nested(*subtasks): - first_mon = teuthology.get_first_mon(ctx, config, config['cluster']) - (mon,) = ctx.cluster.only(first_mon).remotes.keys() - if not hasattr(ctx, 'managers'): - ctx.managers = {} - ctx.managers[config['cluster']] = CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager.' + config['cluster']), - cluster=config['cluster'], - ) - try: if config.get('wait-for-healthy', True): healthy(ctx=ctx, config=dict(cluster=config['cluster'])) From 24bb450d39761ed526a6b6eb285c8c528ab90b5a Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Fri, 1 Oct 2021 12:02:14 -0400 Subject: [PATCH 0083/2866] qa: use ctx's ceph_manager to run ceph commands by mount This allows hooks for `cephadm shell` to function so that this code works with cephadm deployments. Signed-off-by: Patrick Donnelly --- qa/tasks/cephfs/mount.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 6224a7eae7b08..0f9c9397125a6 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -56,6 +56,8 @@ def __init__(self, ctx, test_dir, client_id, client_remote, self.cephfs_name = cephfs_name self.cephfs_mntpt = cephfs_mntpt + self.cluster_name = 'ceph' # TODO: use config['cluster'] + self.fs = None self._netns_name = None @@ -124,8 +126,7 @@ def netns_name(self, name): self._netns_name = name def assert_that_ceph_fs_exists(self): - output = self.client_remote.run(args='ceph fs ls', stdout=StringIO()).\ - stdout.getvalue() + output = self.ctx.managers[self.cluster_name].raw_cluster_cmd("fs", "ls") if self.cephfs_name: assert self.cephfs_name in output, \ 'expected ceph fs is not present on the cluster' From dbe5573ed4781cb4b214e701c77be7bc2cddabf3 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Fri, 1 Oct 2021 12:05:12 -0400 Subject: [PATCH 0084/2866] qa: add note about where caps are generated Signed-off-by: Patrick Donnelly --- qa/tasks/cephfs/mount.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 0f9c9397125a6..02deb6f8fe04b 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -566,6 +566,7 @@ def wait_until_mounted(self): raise NotImplementedError() def get_keyring_path(self): + # N.B.: default keyring is /etc/ceph/ceph.keyring; see ceph.py and generate_caps return '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id) def get_key_from_keyfile(self): From 5a7382214fe4dbd4b79773c6e732512ade22793a Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Fri, 1 Oct 2021 12:05:42 -0400 Subject: [PATCH 0085/2866] qa: add tasks to check mds upgrade state Signed-off-by: Patrick Donnelly --- qa/tasks/fs.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/qa/tasks/fs.py b/qa/tasks/fs.py index 71d7b22e81462..f7a9330e29b50 100644 --- a/qa/tasks/fs.py +++ b/qa/tasks/fs.py @@ -9,6 +9,74 @@ log = logging.getLogger(__name__) +# Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY +CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5) +CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY +UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1) +def pre_upgrade_save(ctx, config): + """ + That the upgrade procedure doesn't clobber state: save state. + """ + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + state = {} + ctx['mds-upgrade-state'] = state + + for fs in list(status.get_filesystems()): + fscid = fs['id'] + mdsmap = fs['mdsmap'] + fs_state = {} + fs_state['epoch'] = mdsmap['epoch'] + fs_state['max_mds'] = mdsmap['max_mds'] + fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK + state[fscid] = fs_state + log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") + + +def post_upgrade_checks(ctx, config): + """ + That the upgrade procedure doesn't clobber state. + """ + + state = ctx['mds-upgrade-state'] + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + for fs in list(status.get_filesystems()): + fscid = fs['id'] + mdsmap = fs['mdsmap'] + fs_state = state[fscid] + log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") + + # check state was restored to previous values + assert fs_state['max_mds'] == mdsmap['max_mds'] + assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK) + + # now confirm that the upgrade procedure was followed + epoch = mdsmap['epoch'] + pre_upgrade_epoch = fs_state['epoch'] + assert pre_upgrade_epoch < epoch + should_decrease_max_mds = fs_state['max_mds'] > 1 + did_decrease_max_mds = False + should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY + did_disable_allow_standby_replay = False + for i in range(pre_upgrade_epoch+1, mdsmap['epoch']): + old_status = mdsc.status(epoch=i) + old_fs = old_status.get_fsmap(fscid) + old_mdsmap = old_fs['mdsmap'] + if should_decrease_max_mds and old_mdsmap['max_mds'] == 1: + log.debug(f"max_mds reduced in epoch {i}") + did_decrease_max_mds = True + if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY): + log.debug(f"allow_standby_replay disabled in epoch {i}") + did_disable_allow_standby_replay = True + assert not should_decrease_max_mds or did_decrease_max_mds + assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay + + def ready(ctx, config): """ That the file system is ready for clients. From b1420e5771927f5c659e0e5edbc5714035f3df09 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Fri, 1 Oct 2021 12:06:50 -0400 Subject: [PATCH 0086/2866] qa: add test for cephfs upgrade sequence This also checks max_mds>1 and allow_standby_replay are restored to previous values. Future work can add tests for multiple file systems (or volumes). Signed-off-by: Patrick Donnelly --- qa/suites/fs/upgrade/mds_upgrade_sequence/% | 0 qa/suites/fs/upgrade/mds_upgrade_sequence/.qa | 1 + .../bluestore-bitmap.yaml | 1 + .../centos_8.3_container_tools_3.0.yaml | 1 + .../fs/upgrade/mds_upgrade_sequence/conf | 1 + .../upgrade/mds_upgrade_sequence/overrides/% | 0 .../mds_upgrade_sequence/overrides/.qa | 1 + .../overrides/pg-warn.yaml | 5 +++ .../overrides/whitelist_health.yaml | 1 + .../whitelist_wrongly_marked_down.yaml | 1 + .../upgrade/mds_upgrade_sequence/roles.yaml | 11 +++++++ .../fs/upgrade/mds_upgrade_sequence/tasks/% | 0 .../fs/upgrade/mds_upgrade_sequence/tasks/.qa | 1 + .../mds_upgrade_sequence/tasks/0-v16.2.4.yaml | 25 ++++++++++++++ .../mds_upgrade_sequence/tasks/1-volume/% | 0 .../mds_upgrade_sequence/tasks/1-volume/.qa | 1 + .../tasks/1-volume/0-create.yaml | 5 +++ .../tasks/1-volume/1-ranks/.qa | 1 + .../tasks/1-volume/1-ranks/1.yaml | 4 +++ .../tasks/1-volume/1-ranks/2.yaml | 4 +++ .../tasks/1-volume/2-allow_standby_replay/.qa | 1 + .../1-volume/2-allow_standby_replay/no.yaml | 4 +++ .../1-volume/2-allow_standby_replay/yes.yaml | 4 +++ .../tasks/1-volume/3-verify.yaml | 7 ++++ .../mds_upgrade_sequence/tasks/2-client.yaml | 3 ++ .../tasks/3-upgrade-with-workload.yaml | 33 +++++++++++++++++++ .../mds_upgrade_sequence/tasks/4-verify.yaml | 5 +++ qa/suites/orch/cephadm/mds_upgrade_sequence | 1 + 28 files changed, 122 insertions(+) create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/% create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/.qa create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.3_container_tools_3.0.yaml create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/conf create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/% create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/whitelist_health.yaml create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/whitelist_wrongly_marked_down.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/% create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-v16.2.4.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/% create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml create mode 120000 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-verify.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-with-workload.yaml create mode 100644 qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-verify.yaml create mode 120000 qa/suites/orch/cephadm/mds_upgrade_sequence diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa new file mode 120000 index 0000000000000..a602a0353e751 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml new file mode 120000 index 0000000000000..fb603bc9a64c8 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.3_container_tools_3.0.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.3_container_tools_3.0.yaml new file mode 120000 index 0000000000000..479a5c26e451b --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.3_container_tools_3.0.yaml @@ -0,0 +1 @@ +.qa/distros/podman/centos_8.3_container_tools_3.0.yaml \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/conf b/qa/suites/fs/upgrade/mds_upgrade_sequence/conf new file mode 120000 index 0000000000000..6d47129847fad --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/conf @@ -0,0 +1 @@ +.qa/cephfs/conf/ \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa new file mode 120000 index 0000000000000..a602a0353e751 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml new file mode 100644 index 0000000000000..4ae54a40d3195 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon pg warn min per osd: 0 diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/whitelist_health.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/whitelist_health.yaml new file mode 120000 index 0000000000000..74f39a49b27ed --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/whitelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/whitelist_health.yaml \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 0000000000000..b4528c0f8c096 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml new file mode 100644 index 0000000000000..bce4ecd34ccb3 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml @@ -0,0 +1,11 @@ +roles: +- - host.a + - client.0 + - osd.0 + - osd.1 + - osd.2 +- - host.b + - client.1 + - osd.3 + - osd.4 + - osd.5 diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa new file mode 120000 index 0000000000000..a602a0353e751 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-v16.2.4.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-v16.2.4.yaml new file mode 100644 index 0000000000000..e61d622669715 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-v16.2.4.yaml @@ -0,0 +1,25 @@ +meta: +- desc: | + setup ceph/pacific v16.2.4 + +tasks: +- install: + tag: v16.2.4 + exclude_packages: + - ceph-volume +- cephadm: + roleless: true + image: docker.io/ceph/ceph:v16.2.4 + cephadm_branch: v16.2.4 + cephadm_git_url: https://github.com/ceph/ceph + # needed for v16.2.4 due to --skip-admin-label + avoid_pacific_features: true +- print: "**** done starting v16.2.4" +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls + diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/% new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa new file mode 120000 index 0000000000000..a602a0353e751 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml new file mode 100644 index 0000000000000..5ee0022c6bd6e --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml @@ -0,0 +1,5 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs volume create cephfs --placement=4 + - ceph fs dump diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa new file mode 120000 index 0000000000000..a602a0353e751 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml new file mode 100644 index 0000000000000..8c1cd2fe0a1cd --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs max_mds 1 diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml new file mode 100644 index 0000000000000..fcd3b1ea42ea7 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs max_mds 2 diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa new file mode 120000 index 0000000000000..a602a0353e751 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml new file mode 100644 index 0000000000000..3dbc810899465 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs allow_standby_replay false diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml new file mode 100644 index 0000000000000..fb894425e990c --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs allow_standby_replay true diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-verify.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-verify.yaml new file mode 100644 index 0000000000000..ec2a2a54f29de --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-verify.yaml @@ -0,0 +1,7 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs dump + - ceph --format=json fs dump | jq -e ".filesystems | length == 1" + - ceph --format=json mds versions | jq -e ". | add == 4" +- fs.pre_upgrade_save: diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml new file mode 100644 index 0000000000000..d7352c6bf5c2d --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml @@ -0,0 +1,3 @@ +tasks: +- kclient: +- print: "**** done kclient client" diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-with-workload.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-with-workload.yaml new file mode 100644 index 0000000000000..200c4dcb58bb3 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-with-workload.yaml @@ -0,0 +1,33 @@ +tasks: +- parallel: + - upgrade-tasks + - workload-tasks + +upgrade-tasks: + sequential: + - cephadm.shell: + env: [sha1] + host.a: + - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force + - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force + - ceph config set global log_to_journald false --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - cephadm.shell: + env: [sha1] + host.a: + - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; ceph fs dump; sleep 30 ; done + - ceph orch ps + - ceph versions + - echo "wait for servicemap items w/ changing names to refresh" + - sleep 60 + - ceph orch ps + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 + +workload-tasks: + sequential: + - workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-verify.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-verify.yaml new file mode 100644 index 0000000000000..c2b657e5a3b8a --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-verify.yaml @@ -0,0 +1,5 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs dump +- fs.post_upgrade_checks: diff --git a/qa/suites/orch/cephadm/mds_upgrade_sequence b/qa/suites/orch/cephadm/mds_upgrade_sequence new file mode 120000 index 0000000000000..24aa41c1039f9 --- /dev/null +++ b/qa/suites/orch/cephadm/mds_upgrade_sequence @@ -0,0 +1 @@ +.qa/suites/fs/upgrade/mds_upgrade_sequence/ \ No newline at end of file From 485841b255e1bf59a64c285b6b3971577ce87811 Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Tue, 15 Jun 2021 18:25:22 +0530 Subject: [PATCH 0087/2866] qa: import CommandFailedError from exceptions not run Stop importing CommandFailedError from teuthology.orchestra.run, it is actually defined in teuthology.exception. Fixes: https://tracker.ceph.com/issues/51226 Signed-off-by: Rishabh Dave --- qa/tasks/ceph_test_case.py | 2 +- qa/tasks/cephfs/cephfs_test_case.py | 2 +- qa/tasks/cephfs/fuse_mount.py | 2 +- qa/tasks/cephfs/kernel_mount.py | 2 +- qa/tasks/cephfs/mount.py | 3 ++- qa/tasks/cephfs/test_admin.py | 3 ++- qa/tasks/cephfs/test_auto_repair.py | 2 +- qa/tasks/cephfs/test_cephfs_shell.py | 2 +- qa/tasks/cephfs/test_client_recovery.py | 2 +- qa/tasks/cephfs/test_data_scan.py | 2 +- qa/tasks/cephfs/test_exports.py | 2 +- qa/tasks/cephfs/test_forward_scrub.py | 2 +- qa/tasks/cephfs/test_full.py | 2 +- qa/tasks/cephfs/test_misc.py | 2 +- qa/tasks/cephfs/test_multifs_auth.py | 2 +- qa/tasks/cephfs/test_multimds_misc.py | 2 +- qa/tasks/cephfs/test_recovery_pool.py | 2 +- qa/tasks/cephfs/test_snapshots.py | 3 ++- qa/tasks/cephfs/test_strays.py | 3 ++- qa/tasks/kclient.py | 2 +- qa/tasks/mds_creation_failure.py | 3 ++- qa/tasks/vstart_runner.py | 2 +- qa/tasks/workunit.py | 2 +- 23 files changed, 28 insertions(+), 23 deletions(-) diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py index 7040853dfb437..0ce9e81f1c138 100644 --- a/qa/tasks/ceph_test_case.py +++ b/qa/tasks/ceph_test_case.py @@ -3,7 +3,7 @@ import time import logging -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError if TYPE_CHECKING: from tasks.mgr.mgr_test_case import MgrCluster diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index 6b3460ff248e2..8383359d5aa7f 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -9,7 +9,7 @@ from teuthology import contextutil from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError log = logging.getLogger(__name__) diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py index 5f4978e57155d..8ecaab7b78bba 100644 --- a/qa/tasks/cephfs/fuse_mount.py +++ b/qa/tasks/cephfs/fuse_mount.py @@ -8,7 +8,7 @@ from teuthology.contextutil import MaxWhileTries from teuthology.contextutil import safe_while from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from tasks.ceph_manager import get_valgrind_args from tasks.cephfs.mount import CephFSMount diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index 55fdae5d9e84d..9d14f4d749a2c 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -6,7 +6,7 @@ from io import StringIO from textwrap import dedent -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from teuthology.orchestra import run from teuthology.contextutil import MaxWhileTries diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 6224a7eae7b08..dccac187db659 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -14,7 +14,8 @@ from teuthology.contextutil import safe_while from teuthology.misc import get_file, write_file from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError, ConnectionLostError, Raw +from teuthology.orchestra.run import Raw +from teuthology.exceptions import CommandFailedError, ConnectionLostError from tasks.cephfs.filesystem import Filesystem diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py index aff83b86797ba..d7a083d72ec1f 100644 --- a/qa/tasks/cephfs/test_admin.py +++ b/qa/tasks/cephfs/test_admin.py @@ -6,7 +6,8 @@ from io import StringIO from os.path import join as os_path_join -from teuthology.orchestra.run import CommandFailedError, Raw +from teuthology.orchestra.run import Raw +from teuthology.exceptions import CommandFailedError from tasks.cephfs.cephfs_test_case import CephFSTestCase from tasks.cephfs.filesystem import FileLayout, FSMissing diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py index 00c86b68bf60d..e6f0a8f0bd4cb 100644 --- a/qa/tasks/cephfs/test_auto_repair.py +++ b/qa/tasks/cephfs/test_auto_repair.py @@ -6,7 +6,7 @@ import logging import time -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from tasks.cephfs.cephfs_test_case import CephFSTestCase diff --git a/qa/tasks/cephfs/test_cephfs_shell.py b/qa/tasks/cephfs/test_cephfs_shell.py index 83ee3991196e0..1ee5c7f84645d 100644 --- a/qa/tasks/cephfs/test_cephfs_shell.py +++ b/qa/tasks/cephfs/test_cephfs_shell.py @@ -10,7 +10,7 @@ import math from time import sleep from tasks.cephfs.cephfs_test_case import CephFSTestCase -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError log = logging.getLogger(__name__) diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py index 24726b369f9fd..eea93939d24b5 100644 --- a/qa/tasks/cephfs/test_client_recovery.py +++ b/qa/tasks/cephfs/test_client_recovery.py @@ -11,7 +11,7 @@ import os from teuthology.orchestra import run -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.cephfs_test_case import CephFSTestCase from teuthology.packaging import get_package_version diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py index dcb7eda40a56d..177ffe01eaa08 100644 --- a/qa/tasks/cephfs/test_data_scan.py +++ b/qa/tasks/cephfs/test_data_scan.py @@ -13,7 +13,7 @@ from collections import namedtuple, defaultdict from textwrap import dedent -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology log = logging.getLogger(__name__) diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py index d2421bedcbbc4..4961edb520d01 100644 --- a/qa/tasks/cephfs/test_exports.py +++ b/qa/tasks/cephfs/test_exports.py @@ -3,7 +3,7 @@ import time from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.cephfs_test_case import CephFSTestCase -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError log = logging.getLogger(__name__) diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py index 82630e069587b..ab97a6f596ca1 100644 --- a/qa/tasks/cephfs/test_forward_scrub.py +++ b/qa/tasks/cephfs/test_forward_scrub.py @@ -14,7 +14,7 @@ from io import BytesIO from textwrap import dedent -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from tasks.cephfs.cephfs_test_case import CephFSTestCase import struct diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py index 2b7166d6b2be2..962a3d036d1fc 100644 --- a/qa/tasks/cephfs/test_full.py +++ b/qa/tasks/cephfs/test_full.py @@ -7,7 +7,7 @@ except: # make it work for python2 pass -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.cephfs_test_case import CephFSTestCase diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index 85b11cc6ca045..0367c141f29cc 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -2,7 +2,7 @@ from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.cephfs_test_case import CephFSTestCase -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError import errno import time import json diff --git a/qa/tasks/cephfs/test_multifs_auth.py b/qa/tasks/cephfs/test_multifs_auth.py index 2b28c61ca043e..6768d57cfc0af 100644 --- a/qa/tasks/cephfs/test_multifs_auth.py +++ b/qa/tasks/cephfs/test_multifs_auth.py @@ -8,7 +8,7 @@ # CapsHelper is subclassed from CephFSTestCase from tasks.cephfs.caps_helper import CapsHelper -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError log = logging.getLogger(__name__) diff --git a/qa/tasks/cephfs/test_multimds_misc.py b/qa/tasks/cephfs/test_multimds_misc.py index 3c464e91d0a28..2bb6257c7eb8a 100644 --- a/qa/tasks/cephfs/test_multimds_misc.py +++ b/qa/tasks/cephfs/test_multimds_misc.py @@ -2,7 +2,7 @@ import errno from tasks.cephfs.cephfs_test_case import CephFSTestCase from teuthology.contextutil import safe_while -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError log = logging.getLogger(__name__) diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py index 9926b3670cac3..5dd5960add1f2 100644 --- a/qa/tasks/cephfs/test_recovery_pool.py +++ b/qa/tasks/cephfs/test_recovery_pool.py @@ -6,7 +6,7 @@ import traceback from collections import namedtuple -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from tasks.cephfs.cephfs_test_case import CephFSTestCase log = logging.getLogger(__name__) diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py index fa4d9431fefdf..e651bb1c6b103 100644 --- a/qa/tasks/cephfs/test_snapshots.py +++ b/qa/tasks/cephfs/test_snapshots.py @@ -4,7 +4,8 @@ from textwrap import dedent from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.cephfs_test_case import CephFSTestCase -from teuthology.orchestra.run import CommandFailedError, Raw +from teuthology.orchestra.run import Raw +from teuthology.exceptions import CommandFailedError log = logging.getLogger(__name__) diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py index f8313badd0358..44ca0b013d840 100644 --- a/qa/tasks/cephfs/test_strays.py +++ b/qa/tasks/cephfs/test_strays.py @@ -5,7 +5,8 @@ import datetime import gevent -from teuthology.orchestra.run import CommandFailedError, Raw +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra.run import Raw from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology log = logging.getLogger(__name__) diff --git a/qa/tasks/kclient.py b/qa/tasks/kclient.py index d7bc9fa835b55..2749f01daeef1 100644 --- a/qa/tasks/kclient.py +++ b/qa/tasks/kclient.py @@ -5,7 +5,7 @@ import logging from teuthology.misc import deep_merge -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from teuthology import misc from teuthology.contextutil import MaxWhileTries from tasks.cephfs.kernel_mount import KernelMount diff --git a/qa/tasks/mds_creation_failure.py b/qa/tasks/mds_creation_failure.py index 58314086c0337..2ab8f70dd9d2f 100644 --- a/qa/tasks/mds_creation_failure.py +++ b/qa/tasks/mds_creation_failure.py @@ -5,7 +5,8 @@ import time from tasks import ceph_manager from teuthology import misc -from teuthology.orchestra.run import CommandFailedError, Raw +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra.run import Raw log = logging.getLogger(__name__) diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py index eb85d5ab43a2d..916256495d454 100644 --- a/qa/tasks/vstart_runner.py +++ b/qa/tasks/vstart_runner.py @@ -56,7 +56,7 @@ from teuthology.config import config as teuth_config from teuthology.contextutil import safe_while from teuthology.contextutil import MaxWhileTries -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError try: import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py index 371d2a2dddb5a..6412c99bc04d0 100644 --- a/qa/tasks/workunit.py +++ b/qa/tasks/workunit.py @@ -12,7 +12,7 @@ from teuthology import misc from teuthology.config import config as teuth_config -from teuthology.orchestra.run import CommandFailedError +from teuthology.exceptions import CommandFailedError from teuthology.parallel import parallel from teuthology.orchestra import run From eb25549b8c39a3ad2ee3152a35338e90f0693a13 Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Tue, 5 Oct 2021 23:57:36 +0530 Subject: [PATCH 0088/2866] qa/cephfs: update xfstests_dev for centos stream Fixes: https://tracker.ceph.com/issues/52821 Signed-off-by: Rishabh Dave --- qa/tasks/cephfs/xfstests_dev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/tasks/cephfs/xfstests_dev.py b/qa/tasks/cephfs/xfstests_dev.py index c0bdf338e1671..fe957b0045e65 100644 --- a/qa/tasks/cephfs/xfstests_dev.py +++ b/qa/tasks/cephfs/xfstests_dev.py @@ -95,7 +95,7 @@ def install_deps(self): # we keep fedora here so that right deps are installed when this test # is run locally by a dev. if distro in ('redhatenterpriseserver', 'redhatenterprise', 'fedora', - 'centos'): + 'centos', 'centosstream'): deps = """acl attr automake bc dbench dump e2fsprogs fio \ gawk gcc indent libtool lvm2 make psmisc quota sed \ xfsdump xfsprogs \ From 97f9f5362bea855628c2f82c31176bd129db19b7 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 5 Oct 2021 13:49:17 -0400 Subject: [PATCH 0089/2866] qa: skip c-v install on pacific start Continuation of 4e525127fbb710c1ac074cf61b448055781a69e3. Fixes: https://tracker.ceph.com/issues/52822 Signed-off-by: Patrick Donnelly --- .../fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml | 1 + .../featureful_client/upgraded_client/tasks/0-octopus.yaml | 1 + qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml index 40d34753da1b8..e7774423fa544 100644 --- a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml @@ -11,6 +11,7 @@ tasks: - ceph-mgr-rook - ceph-mgr-cephadm - cephadm + - ceph-volume extra_packages: ['librados2'] - print: "**** done installing octopus" - ceph: diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml index 40d34753da1b8..e7774423fa544 100644 --- a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml @@ -11,6 +11,7 @@ tasks: - ceph-mgr-rook - ceph-mgr-cephadm - cephadm + - ceph-volume extra_packages: ['librados2'] - print: "**** done installing octopus" - ceph: diff --git a/qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml b/qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml index 40d34753da1b8..e7774423fa544 100644 --- a/qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml +++ b/qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml @@ -11,6 +11,7 @@ tasks: - ceph-mgr-rook - ceph-mgr-cephadm - cephadm + - ceph-volume extra_packages: ['librados2'] - print: "**** done installing octopus" - ceph: From b96aa5d1844e334c01e0e0dd17179468e881e883 Mon Sep 17 00:00:00 2001 From: Paul Cuzner Date: Wed, 6 Oct 2021 14:32:47 +1300 Subject: [PATCH 0090/2866] monitoring:Updated README Signed-off-by: Paul Cuzner --- monitoring/prometheus/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/monitoring/prometheus/README.md b/monitoring/prometheus/README.md index 205e373b19c0e..7ec8f309adcf5 100644 --- a/monitoring/prometheus/README.md +++ b/monitoring/prometheus/README.md @@ -7,8 +7,8 @@ file in a place according to your Prometheus configuration (wherever the `rules` configuration stanza points). ### SNMP -Ceph provides a MIB (CEPH-PROMETHEUS-ALERT-MIB.txt) to support sending prometheus -alerts through to an SNMP management platform. The translation from prometheus -alert to SNMP trap requires the prometheus alert to contain an OID that maps to -a definition within the MIB. When making changes to the prometheus alert rules +Ceph provides a MIB (CEPH-PROMETHEUS-ALERT-MIB.txt) to support sending Prometheus +alerts through to an SNMP management platform. The translation from Prometheus +alert to SNMP trap requires the Prometheus alert to contain an OID that maps to +a definition within the MIB. When making changes to the Prometheus alert rules file, developers should include any necessary changes to the MIB. From ff88d7de52b347486a626375816fc119ee8fd72f Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Thu, 7 Oct 2021 00:40:13 -0400 Subject: [PATCH 0091/2866] qa: skip internal metadata directory when scanning ceph debugfs directory kclient patchset https://patchwork.kernel.org/project/ceph-devel/list/?series=556049 introduces `meta` directory to add debugging entries. This needs to be filtered when scanning ceph debugfs directory. Fixes: https://tracker.ceph.com/issues/52824 Signed-off-by: Venky Shankar --- qa/tasks/cephfs/kernel_mount.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index 55fdae5d9e84d..e8018eeec3640 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -16,7 +16,8 @@ UMOUNT_TIMEOUT = 300 - +# internal metadata directory +DEBUGFS_META_DIR = 'meta' class KernelMount(CephFSMount): def __init__(self, ctx, test_dir, client_id, client_remote, @@ -200,6 +201,8 @@ def _get_global_id(self): def get_id_to_dir(): result = {} for dir in glob.glob("/sys/kernel/debug/ceph/*"): + if os.path.basename(dir) == DEBUGFS_META_DIR: + continue mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() global_id = mds_sessions_lines[0].split()[1].strip('"') client_id = mds_sessions_lines[1].split()[1].strip('"') From 6d6236dc8d15636af8060057e6e69c26c473f987 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Fri, 1 Oct 2021 04:55:40 -0400 Subject: [PATCH 0092/2866] mds: skip journaling blocklisted clients when in `replay` state When a standby MDS is transitioning to active, it passes through `replay` state. When the MDS is in this state, there are no journal segments available for recording journal updates. If the MDS receives an OSDMap update in this state, journaling blocklisted clients causes a crash since no journal segments are available. This is a bit hard to reproduce as it requires correct timing of an OSDMap update along with various other factors. Note that, when the MDS reaches `reconnect` state, it will journal the blocklisted clients anyway. This partially fixes tracker: https://tracker.ceph.com/issues/51589 which mentions a similar crash but in `reconnect` state. However, that crash was seen in nautilus. A couple of minor changes include removing hardcoded function names and carving out reusable parts into a separate function. Partially-fixes: https://tracker.ceph.com/issues/51589 Signed-off-by: Venky Shankar --- src/mds/MDSRank.cc | 38 ++++++++++++++++++++++---------------- src/mds/MDSRank.h | 4 ++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 190a53706455d..f5f0585060dab 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1930,6 +1930,17 @@ void MDSRank::resolve_done() snapclient->sync(new C_MDSInternalNoop); } +void MDSRank::apply_blocklist(const std::set &addrs, epoch_t epoch) { + auto victims = server->apply_blocklist(addrs); + dout(4) << __func__ << ": killed " << victims << " blocklisted sessions (" + << addrs.size() << " blocklist entries, " + << sessionmap.get_sessions().size() << ")" << dendl; + if (victims) { + set_osd_epoch_barrier(epoch); + } +} + + void MDSRank::reconnect_start() { dout(1) << "reconnect_start" << dendl; @@ -1947,13 +1958,8 @@ void MDSRank::reconnect_start() o.get_blocklist(&blocklist); epoch = o.get_epoch(); }); - auto killed = server->apply_blocklist(blocklist); - dout(4) << "reconnect_start: killed " << killed << " blocklisted sessions (" - << blocklist.size() << " blocklist entries, " - << sessionmap.get_sessions().size() << ")" << dendl; - if (killed) { - set_osd_epoch_barrier(epoch); - } + + apply_blocklist(blocklist, epoch); server->reconnect_clients(new C_MDS_VoidFn(this, &MDSRank::reconnect_done)); finish_contexts(g_ceph_context, waiting_for_reconnect); @@ -3455,17 +3461,17 @@ void MDSRankDispatcher::handle_osd_map() purge_queue.update_op_limit(*mdsmap); - std::set newly_blocklisted; - objecter->consume_blocklist_events(&newly_blocklisted); - auto epoch = objecter->with_osdmap([](const OSDMap &o){return o.get_epoch();}); - dout(4) << "handle_osd_map epoch " << epoch << ", " - << newly_blocklisted.size() << " new blocklist entries" << dendl; - auto victims = server->apply_blocklist(newly_blocklisted); - if (victims) { - set_osd_epoch_barrier(epoch); + // it's ok if replay state is reached via standby-replay, the + // reconnect state will journal blocklisted clients (journal + // is opened for writing in `replay_done` before moving to + // up:resolve). + if (!is_replay()) { + std::set newly_blocklisted; + objecter->consume_blocklist_events(&newly_blocklisted); + auto epoch = objecter->with_osdmap([](const OSDMap &o){return o.get_epoch();}); + apply_blocklist(newly_blocklisted, epoch); } - // By default the objecter only requests OSDMap updates on use, // we would like to always receive the latest maps in order to // apply policy based on the FULL flag. diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 6ba2001932867..21b5ac7924b62 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -546,6 +546,10 @@ class MDSRank { Context *create_async_exec_context(C_ExecAndReply *ctx); + // blocklist the provided addrs and set OSD epoch barrier + // with the provided epoch. + void apply_blocklist(const std::set &addrs, epoch_t epoch); + // Incarnation as seen in MDSMap at the point where a rank is // assigned. int incarnation = 0; From bce34dd68634d241b451111dcf2e931837eb4bfd Mon Sep 17 00:00:00 2001 From: Cory Snyder Date: Tue, 5 Oct 2021 19:07:11 +0000 Subject: [PATCH 0093/2866] rgw/rgw_rados: make RGW request IDs non-deterministic Use a random number vs. incremental counter for first component of request ID. Fixes: https://tracker.ceph.com/issues/52818 Signed-off-by: Cory Snyder --- src/rgw/rgw_rados.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 26b603b128b01..cb96b9dbb2e59 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -9,6 +9,7 @@ #include "include/rados/librados.hpp" #include "include/Context.h" +#include "include/random.h" #include "common/RefCountedObj.h" #include "common/ceph_time.h" #include "common/Timer.h" @@ -355,7 +356,7 @@ class RGWRados int open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx, bool mostly_omap); - std::atomic max_req_id = { 0 }; + ceph::mutex lock = ceph::make_mutex("rados_timer_lock"); SafeTimer *timer; @@ -506,7 +507,7 @@ class RGWRados } uint64_t get_new_req_id() { - return ++max_req_id; + return ceph::util::generate_random_number(); } librados::IoCtx* get_lc_pool_ctx() { From 008b41bfc7a80d215314ece61af9291094e3f274 Mon Sep 17 00:00:00 2001 From: Daniel Pivonka Date: Thu, 7 Oct 2021 15:52:05 -0400 Subject: [PATCH 0094/2866] mgr/cephadm: track daemon deployment post actions by daemon name instead of type in rare cases tracking by type can cause a daemon to be missed when doing post actions if it is not in the cache yet Signed-off-by: Daniel Pivonka --- src/pybind/mgr/cephadm/serve.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 1cf024fac310a..59191999532c2 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1004,8 +1004,12 @@ def _check_daemons(self) -> None: # do daemon post actions for daemon_type, daemon_descs in daemons_post.items(): - if daemon_type in self.mgr.requires_post_actions: - self.mgr.requires_post_actions.remove(daemon_type) + run_post = False + for d in daemon_descs: + if d.name() in self.mgr.requires_post_actions: + self.mgr.requires_post_actions.remove(d.name()) + run_post = True + if run_post: self.mgr._get_cephadm_service(daemon_type_to_service( daemon_type)).daemon_check_post(daemon_descs) @@ -1125,7 +1129,7 @@ def _create_daemon(self, DaemonDescriptionStatus.running, 'starting') self.mgr.cache.add_daemon(daemon_spec.host, sd) if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS: - self.mgr.requires_post_actions.add(daemon_spec.daemon_type) + self.mgr.requires_post_actions.add(daemon_spec.name()) self.mgr.cache.invalidate_host_daemons(daemon_spec.host) self.mgr.cache.update_daemon_config_deps( From 00e5e5d5cdb56b7e249ffc98aeb456c9ac1c5f48 Mon Sep 17 00:00:00 2001 From: Dai Zhiwei Date: Fri, 8 Oct 2021 14:26:43 +0800 Subject: [PATCH 0095/2866] qa/tasks: replace iterkeys() with keys() in Python 3 Python 2.7 reached the end of its lifetime, the pr fixes teuthology task error in Python 3.x Fixes: https://tracker.ceph.com/issues/52878 Signed-off-by: Dai Zhiwei --- qa/tasks/netsplit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/qa/tasks/netsplit.py b/qa/tasks/netsplit.py index b6614dc5061b4..0a9484a89a5c0 100644 --- a/qa/tasks/netsplit.py +++ b/qa/tasks/netsplit.py @@ -35,8 +35,8 @@ def disconnect(ctx, config): (ip1, _) = get_ip_and_ports(ctx, config[0]) (ip2, _) = get_ip_and_ports(ctx, config[1]) - (host1,) = ctx.cluster.only(config[0]).remotes.iterkeys() - (host2,) = ctx.cluster.only(config[1]).remotes.iterkeys() + (host1,) = ctx.cluster.only(config[0]).remotes.keys() + (host2,) = ctx.cluster.only(config[1]).remotes.keys() assert host1 is not None assert host2 is not None @@ -58,8 +58,8 @@ def reconnect(ctx, config): (ip1, _) = get_ip_and_ports(ctx, config[0]) (ip2, _) = get_ip_and_ports(ctx, config[1]) - (host1,) = ctx.cluster.only(config[0]).remotes.iterkeys() - (host2,) = ctx.cluster.only(config[1]).remotes.iterkeys() + (host1,) = ctx.cluster.only(config[0]).remotes.keys() + (host2,) = ctx.cluster.only(config[1]).remotes.keys() assert host1 is not None assert host2 is not None From 0be0772e8af4caab8f63271f3e6a908fd2a217aa Mon Sep 17 00:00:00 2001 From: Anthony D'Atri Date: Sun, 10 Oct 2021 12:31:09 -0700 Subject: [PATCH 0096/2866] ceph: document rgw_multipart_min_part_size Signed-off-by: Anthony D'Atri --- doc/radosgw/config-ref.rst | 1 + src/common/options/rgw.yaml.in | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/radosgw/config-ref.rst b/doc/radosgw/config-ref.rst index 371ce89d25d74..295fa8ce2d77e 100644 --- a/doc/radosgw/config-ref.rst +++ b/doc/radosgw/config-ref.rst @@ -36,6 +36,7 @@ instances or all radosgw-admin options can be put into the ``[global]`` or the .. confval:: rgw_exit_timeout_secs .. confval:: rgw_get_obj_window_size .. confval:: rgw_get_obj_max_req_size +.. confval:: rgw_multipart_min_part_size .. confval:: rgw_relaxed_s3_bucket_names .. confval:: rgw_list_buckets_max_chunk .. confval:: rgw_override_bucket_index_max_shards diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index 0cbf397783eeb..4bfe1c596c3dc 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -62,9 +62,11 @@ options: desc: Set RGW max chunk size long_desc: The chunk size is the size of RADOS I/O requests that RGW sends when accessing data objects. RGW read and write operations will never request more than - this amount in a single request. This also defines the RGW object head size, as + this amount in a single request. This also defines the RGW head object size, as head operations need to be atomic, and anything larger than this would require - more than a single operation. + more than a single operation. When RGW objects are written to the default + storage class, up to this amount of payload data will be stored alongside + metadata in the head object. default: 4_M services: - rgw @@ -2119,7 +2121,7 @@ options: type: size level: advanced desc: Minimum S3 multipart-upload part size - long_desc: When doing a multipart upload, each part (other than the last part) should + long_desc: When doing a multipart upload, each part (other than the last part) must be at least this size. default: 5_M services: From d388c5e958ddf5447c78db50ca2061bb443d2227 Mon Sep 17 00:00:00 2001 From: Avan Thakkar Date: Fri, 1 Oct 2021 03:56:42 +0530 Subject: [PATCH 0097/2866] mgr/dashboard: replace Client connections with active-stdby mgrs Fixes: https://tracker.ceph.com/issues/52121 Signed-off-by: Avan Thakkar --- .../grafana/dashboards/ceph-cluster.json | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/monitoring/grafana/dashboards/ceph-cluster.json b/monitoring/grafana/dashboards/ceph-cluster.json index e4e367efdd8b3..1b91bfe418512 100644 --- a/monitoring/grafana/dashboards/ceph-cluster.json +++ b/monitoring/grafana/dashboards/ceph-cluster.json @@ -678,7 +678,7 @@ "type": "vonage-status-panel" }, { - "colorMode": "Disabled", + "colorMode": "Panel", "colors": { "crit": "rgba(245, 54, 54, 0.9)", "disable": "rgba(128, 128, 128, 0.9)", @@ -706,21 +706,36 @@ "targets": [ { "aggregation": "Last", - "alias": "Clients", + "alias": "Active", "decimals": 2, "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "ceph_mds_server_handle_client_session", + "expr": "count(ceph_mgr_status == 1) or vector(0)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Clients", + "legendFormat": "Active", "refId": "A", "units": "none", "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Standby", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 0) or vector(0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Standby", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" } ], - "title": "Client connections", + "title": "MGRs", "type": "vonage-status-panel" }, { From e517882f2138c0e2aab1c08c8b3c882b92bcaf37 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 11 Oct 2021 11:57:40 -0500 Subject: [PATCH 0098/2866] mgr/rook: factor out replica/failureDomain calc Signed-off-by: Sage Weil --- src/pybind/mgr/rook/module.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index 0e53aea449285..27b80538b6734 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -416,6 +416,20 @@ def _list_daemons(self, return result + def _get_pool_params(self) -> Tuple[str, str]: + num_replicas = self.get_ceph_option('osd_pool_default_size') + assert type(num_replicas) is int + + leaf_type_id = self.get_ceph_option('osd_crush_chooseleaf_type') + assert type(leaf_type_id) is int + crush = self.get('osd_map_crush') + leaf_type = 'host' + for t in crush['types']: + if t['type_id'] == leaf_type_id: + leaf_type = t['name'] + break + return num_replicas, leaf_type + @handle_orch_error def remove_service(self, service_name: str) -> str: service_type, service_name = service_name.split('.', 1) @@ -444,16 +458,7 @@ def apply_mds(self, spec): @handle_orch_error def apply_rgw(self, spec): # type: (RGWSpec) -> str - num_replicas = self.get_ceph_option('osd_pool_default_size') - assert type(num_replicas) is int - leaf_type_id = self.get_ceph_option('osd_crush_chooseleaf_type') - assert type(leaf_type_id) is int - crush = self.get('osd_map_crush') - leaf_type = 'host' - for t in crush['types']: - if t['type_id'] == leaf_type_id: - leaf_type = t['name'] - break + num_replicas, leaf_type = self._get_pool_params() return self.rook_cluster.apply_objectstore(spec, num_replicas, leaf_type) @handle_orch_error From 1a67d3e559c50bad1ccc42bf1706326ffa18da80 Mon Sep 17 00:00:00 2001 From: Joseph Sawaya Date: Fri, 3 Sep 2021 13:30:43 -0400 Subject: [PATCH 0099/2866] mgr/rook: apply mds using placement spec and osd_pool_default_size This commit changes the apply_mds command in the rook orchestrator to support some placement specs and also sets the replica size according to the osd_pool_default_size ceph option. This commit also adds `orch apply mds` to the QA to test if the command runs. Signed-off-by: Joseph Sawaya --- qa/suites/orch/rook/smoke/3-final.yaml | 1 + src/pybind/mgr/rook/module.py | 5 +-- src/pybind/mgr/rook/rook_cluster.py | 47 ++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/qa/suites/orch/rook/smoke/3-final.yaml b/qa/suites/orch/rook/smoke/3-final.yaml index 90bbafad6e0f2..91a43f4acc8b5 100644 --- a/qa/suites/orch/rook/smoke/3-final.yaml +++ b/qa/suites/orch/rook/smoke/3-final.yaml @@ -7,3 +7,4 @@ tasks: - ceph orch host ls - ceph orch device ls - ceph orch apply rgw foo + - ceph orch apply mds foo diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index 27b80538b6734..d1412a708374d 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -416,7 +416,7 @@ def _list_daemons(self, return result - def _get_pool_params(self) -> Tuple[str, str]: + def _get_pool_params(self) -> Tuple[int, str]: num_replicas = self.get_ceph_option('osd_pool_default_size') assert type(num_replicas) is int @@ -453,7 +453,8 @@ def apply_mon(self, spec): @handle_orch_error def apply_mds(self, spec): # type: (ServiceSpec) -> str - return self.rook_cluster.apply_filesystem(spec) + num_replicas, leaf_type = self._get_pool_params() + return self.rook_cluster.apply_filesystem(spec, num_replicas, leaf_type) @handle_orch_error def apply_rgw(self, spec): diff --git a/src/pybind/mgr/rook/rook_cluster.py b/src/pybind/mgr/rook/rook_cluster.py index cf302c84550d4..1a28093509386 100644 --- a/src/pybind/mgr/rook/rook_cluster.py +++ b/src/pybind/mgr/rook/rook_cluster.py @@ -842,30 +842,65 @@ def ignore_409(self, what: str) -> Iterator[None]: else: raise - def apply_filesystem(self, spec: ServiceSpec) -> str: + def apply_filesystem(self, spec: ServiceSpec, num_replicas: int, + leaf_type: str) -> str: # TODO use spec.placement # TODO warn if spec.extended has entries we don't kow how # to action. + all_hosts = self.get_hosts() def _update_fs(new: cfs.CephFilesystem) -> cfs.CephFilesystem: new.spec.metadataServer.activeCount = spec.placement.count or 1 + new.spec.metadataServer.placement = cfs.Placement( + nodeAffinity=cfs.NodeAffinity( + requiredDuringSchedulingIgnoredDuringExecution=cfs.RequiredDuringSchedulingIgnoredDuringExecution( + nodeSelectorTerms=cfs.NodeSelectorTermsList( + [placement_spec_to_node_selector(spec.placement, all_hosts)] + ) + ) + ) + ) return new - def _create_fs() -> cfs.CephFilesystem: - return cfs.CephFilesystem( + fs = cfs.CephFilesystem( apiVersion=self.rook_env.api_name, metadata=dict( name=spec.service_id, namespace=self.rook_env.namespace, ), spec=cfs.Spec( - None, - None, + dataPools=cfs.DataPoolsList( + { + cfs.DataPoolsItem( + failureDomain=leaf_type, + replicated=cfs.Replicated( + size=num_replicas + ) + ) + } + ), + metadataPool=cfs.MetadataPool( + failureDomain=leaf_type, + replicated=cfs.Replicated( + size=num_replicas + ) + ), metadataServer=cfs.MetadataServer( activeCount=spec.placement.count or 1, - activeStandby=True + activeStandby=True, + placement= + cfs.Placement( + nodeAffinity=cfs.NodeAffinity( + requiredDuringSchedulingIgnoredDuringExecution=cfs.RequiredDuringSchedulingIgnoredDuringExecution( + nodeSelectorTerms=cfs.NodeSelectorTermsList( + [placement_spec_to_node_selector(spec.placement, all_hosts)] + ) + ) + ) + ) ) ) ) + return fs assert spec.service_id is not None return self._create_or_patch( cfs.CephFilesystem, 'cephfilesystems', spec.service_id, From 952c7c844acee5fe73e3f70737606b700b67238c Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Mon, 11 Oct 2021 14:25:37 -0400 Subject: [PATCH 0100/2866] radosgw-admin: 'reshard list' doesn't log ENOENT errors ENOENT errors are expected, especially in fresh clusters, before we've written any entries to the reshard list shards. avoid logging these non-fatal ERROR messages: > -1 ERROR: failed to list reshard log entries, oid=reshard.0000000000 marker= (2) No such file or directory Fixes: https://tracker.ceph.com/issues/52873 Signed-off-by: Casey Bodley --- src/rgw/rgw_reshard.cc | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index 78c96d14c3932..5d07c1b2a374f 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -890,18 +890,17 @@ int RGWReshard::list(const DoutPrefixProvider *dpp, int logshard_num, string& ma int ret = cls_rgw_reshard_list(store->getRados()->reshard_pool_ctx, logshard_oid, marker, max, entries, is_truncated); - if (ret < 0) { - ldpp_dout(dpp, -1) << "ERROR: failed to list reshard log entries, oid=" << logshard_oid << " " - << "marker=" << marker << " " << cpp_strerror(ret) << dendl; - if (ret == -ENOENT) { - *is_truncated = false; - ret = 0; - } else { - if (ret == -EACCES) { - ldpp_dout(dpp, -1) << "access denied to pool " << store->svc()->zone->get_zone_params().reshard_pool - << ". Fix the pool access permissions of your client" << dendl; - } - } + if (ret == -ENOENT) { + // these shard objects aren't created until we actually write something to + // them, so treat ENOENT as a successful empty listing + *is_truncated = false; + ret = 0; + } else if (ret == -EACCES) { + ldpp_dout(dpp, -1) << "ERROR: access denied to pool " << store->svc()->zone->get_zone_params().reshard_pool + << ". Fix the pool access permissions of your client" << dendl; + } else if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to list reshard log entries, oid=" + << logshard_oid << " marker=" << marker << " " << cpp_strerror(ret) << dendl; } return ret; From f9baa5c71e3e7fbcb89d203e95a92b82661f31e4 Mon Sep 17 00:00:00 2001 From: Adam King Date: Mon, 11 Oct 2021 15:07:49 -0400 Subject: [PATCH 0101/2866] mgr/cephadm: update CEPHADM_AGENT_DOWN when agent reports in Right now if an agent considered down reports in the health warning will reamin until the serve loop runs again. This change will give better responsiveness on setting/removing this health warning. Signed-off-by: Adam King --- src/pybind/mgr/cephadm/agent.py | 27 ++++++++++++++++++++++++++- src/pybind/mgr/cephadm/serve.py | 17 +---------------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index ead4c0a3bee5a..bbed1918f742a 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -21,7 +21,7 @@ from cryptography.hazmat.primitives import hashes, serialization from cryptography.hazmat.backends import default_backend -from typing import Any, Dict, Set, Tuple, TYPE_CHECKING +from typing import Any, Dict, List, Set, Tuple, TYPE_CHECKING if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator @@ -164,6 +164,12 @@ def handle_metadata(self, data: Dict[str, Any]) -> None: # update timestamp of most recent agent update self.mgr.cache.agent_timestamp[host] = datetime_now() + agents_down = [] + for h in self.mgr.cache.get_hosts(): + if self.mgr.agent_helpers._agent_down(h): + agents_down.append(h) + self.mgr.agent_helpers._update_agent_down_healthcheck(agents_down) + up_to_date = False int_ack = int(data['ack']) @@ -261,6 +267,7 @@ def run(self) -> None: secure_agent_socket.sendall(msg.encode('utf-8')) agent_response = secure_agent_socket.recv(1024).decode() self.mgr.log.info(f'Received "{agent_response}" from agent on host {self.host}') + self.mgr.cache.sending_agent_message[self.host] = False return except ConnectionError as e: # if it's a connection error, possibly try to connect again. @@ -312,6 +319,24 @@ def _agent_down(self, host: str) -> bool: return True return False + def _update_agent_down_healthcheck(self, down_agent_hosts: List[str]) -> None: + if 'CEPHADM_AGENT_DOWN' in self.mgr.health_checks: + del self.mgr.health_checks['CEPHADM_AGENT_DOWN'] + if down_agent_hosts: + detail: List[str] = [] + for agent in down_agent_hosts: + detail.append((f'Cephadm agent on host {agent} has not reported in ' + f'{2.5 * self.mgr.agent_refresh_rate} seconds. Agent is assumed ' + 'down and host may be offline.')) + self.mgr.health_checks['CEPHADM_AGENT_DOWN'] = { + 'severity': 'warning', + 'summary': '%d Cephadm Agent(s) are not reporting. ' + 'Hosts may be offline' % (len(down_agent_hosts)), + 'count': len(down_agent_hosts), + 'detail': detail, + } + self.mgr.set_health_checks(self.mgr.health_checks) + # this function probably seems very unnecessary, but it makes it considerably easier # to get the unit tests working. All unit tests that check which daemons were deployed # or services setup would have to be individually changed to expect an agent service or diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index e3e9c57088385..a51aac9abc6e9 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -366,22 +366,7 @@ def refresh(host: str) -> None: refresh(self.mgr.cache.get_hosts()) - if 'CEPHADM_AGENT_DOWN' in self.mgr.health_checks: - del self.mgr.health_checks['CEPHADM_AGENT_DOWN'] - if agents_down: - detail: List[str] = [] - for agent in agents_down: - detail.append((f'Cephadm agent on host {agent} has not reported in ' - f'{2.5 * self.mgr.agent_refresh_rate} seconds. Agent is assumed ' - 'down and host may be offline.')) - self.mgr.health_checks['CEPHADM_AGENT_DOWN'] = { - 'severity': 'warning', - 'summary': '%d Cephadm Agent(s) are not reporting. ' - 'Hosts may be offline' % (len(agents_down)), - 'count': len(agents_down), - 'detail': detail, - } - self.mgr.set_health_checks(self.mgr.health_checks) + self.mgr.agent_helpers._update_agent_down_healthcheck(agents_down) self.mgr.config_checker.run_checks() From 1f43d8e618b68a8a1698745950bb095aa27886c5 Mon Sep 17 00:00:00 2001 From: Adam King Date: Wed, 29 Sep 2021 15:56:00 -0400 Subject: [PATCH 0102/2866] mgr/cephadm: update CEPHADM_FAILED_DAEMON after receiving agent metadata otherwise, there can be a period where a daemon will be marked incorrectly as failed or not failed even though mgr/cephadm knows it is or isn't in an error state Signed-off-by: Adam King --- src/pybind/mgr/cephadm/agent.py | 2 ++ src/pybind/mgr/cephadm/module.py | 15 ++++++++- src/pybind/mgr/cephadm/serve.py | 34 ++++++++++---------- src/pybind/mgr/cephadm/tests/test_cephadm.py | 18 +++++++---- 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index ead4c0a3bee5a..1350d079794de 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -178,6 +178,7 @@ def handle_metadata(self, data: Dict[str, Any]) -> None: if 'ls' in data and data['ls']: self.mgr._process_ls_output(host, data['ls']) + self.mgr.update_failed_daemon_health_check() if 'networks' in data and data['networks']: self.mgr.cache.update_host_networks(host, data['networks']) if 'facts' in data and data['facts']: @@ -261,6 +262,7 @@ def run(self) -> None: secure_agent_socket.sendall(msg.encode('utf-8')) agent_response = secure_agent_socket.recv(1024).decode() self.mgr.log.info(f'Received "{agent_response}" from agent on host {self.host}') + self.mgr.cache.sending_agent_message[self.host] = False return except ConnectionError as e: # if it's a connection error, possibly try to connect again. diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 41476c1c99207..238b1757ec133 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -729,6 +729,18 @@ def offline_hosts_remove(self, host: str) -> None: if host in self.offline_hosts: self.offline_hosts.remove(host) + def update_failed_daemon_health_check(self) -> None: + self.remove_health_warning('CEPHADM_FAILED_DAEMON') + failed_daemons = [] + for dd in self.cache.get_daemons(): + if dd.status is not None and dd.status == DaemonDescriptionStatus.error: + failed_daemons.append('daemon %s on %s is in %s state' % ( + dd.name(), dd.hostname, dd.status_desc + )) + if failed_daemons: + self.set_health_warning('CEPHADM_FAILED_DAEMON', f'{len(failed_daemons)} failed cephadm daemon(s)', len( + failed_daemons), failed_daemons) + @staticmethod def can_run() -> Tuple[bool, str]: if asyncssh is not None: @@ -1503,7 +1515,8 @@ def _set_maintenance_healthcheck(self) -> None: self.remove_health_warning('HOST_IN_MAINTENANCE') else: s = "host is" if len(in_maintenance) == 1 else "hosts are" - self.set_health_warning("HOST_IN_MAINTENANCE", f"{len(in_maintenance)} {s} in maintenance mode", 1, [f"{h} is in maintenance" for h in in_maintenance]) + self.set_health_warning("HOST_IN_MAINTENANCE", f"{len(in_maintenance)} {s} in maintenance mode", 1, [ + f"{h} is in maintenance" for h in in_maintenance]) @handle_orch_error @host_exists() diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index c93e95cc26c04..f42465647fae7 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -135,7 +135,8 @@ def _serve_sleep(self) -> None: def _update_paused_health(self) -> None: if self.mgr.paused: - self.mgr.set_health_warning('CEPHADM_PAUSED', 'cephadm background work is paused', 1, ["'ceph orch resume' to resume"]) + self.mgr.set_health_warning('CEPHADM_PAUSED', 'cephadm background work is paused', 1, [ + "'ceph orch resume' to resume"]) else: self.mgr.remove_health_warning('CEPHADM_PAUSED') @@ -379,22 +380,16 @@ def refresh(host: str) -> None: for k in [ 'CEPHADM_HOST_CHECK_FAILED', - 'CEPHADM_FAILED_DAEMON', 'CEPHADM_REFRESH_FAILED', ]: self.mgr.remove_health_warning(k) if bad_hosts: - self.mgr.set_health_warning('CEPHADM_HOST_CHECK_FAILED', f'{len(bad_hosts)} hosts fail cephadm check', len(bad_hosts), bad_hosts) + self.mgr.set_health_warning( + 'CEPHADM_HOST_CHECK_FAILED', f'{len(bad_hosts)} hosts fail cephadm check', len(bad_hosts), bad_hosts) if failures: - self.mgr.set_health_warning('CEPHADM_REFRESH_FAILED', 'failed to probe daemons or devices', len(failures), failures) - failed_daemons = [] - for dd in self.mgr.cache.get_daemons(): - if dd.status is not None and dd.status == DaemonDescriptionStatus.error: - failed_daemons.append('daemon %s on %s is in %s state' % ( - dd.name(), dd.hostname, dd.status_desc - )) - if failed_daemons: - self.mgr.set_health_warning('CEPHADM_FAILED_DAEMON', f'{len(failed_daemons)} failed cephadm daemon(s)', len(failed_daemons), failed_daemons) + self.mgr.set_health_warning( + 'CEPHADM_REFRESH_FAILED', 'failed to probe daemons or devices', len(failures), failures) + self.mgr.update_failed_daemon_health_check() def _check_host(self, host: str) -> Optional[str]: if host not in self.mgr.inventory: @@ -547,9 +542,11 @@ def _check_for_strays(self) -> None: 'stray host %s has %d stray daemons: %s' % ( host, len(missing_names), missing_names)) if self.mgr.warn_on_stray_hosts and host_detail: - self.mgr.set_health_warning('CEPHADM_STRAY_HOST', f'{len(host_detail)} stray host(s) with {host_num_daemons} daemon(s) not managed by cephadm', len(host_detail), host_detail) + self.mgr.set_health_warning( + 'CEPHADM_STRAY_HOST', f'{len(host_detail)} stray host(s) with {host_num_daemons} daemon(s) not managed by cephadm', len(host_detail), host_detail) if self.mgr.warn_on_stray_daemons and daemon_detail: - self.mgr.set_health_warning('CEPHADM_STRAY_DAEMON', f'{len(daemon_detail)} stray daemon(s) not managed by cephadm', len(daemon_detail), daemon_detail) + self.mgr.set_health_warning( + 'CEPHADM_STRAY_DAEMON', f'{len(daemon_detail)} stray daemon(s) not managed by cephadm', len(daemon_detail), daemon_detail) def _apply_all_services(self) -> bool: r = False @@ -624,9 +621,11 @@ def _apply_service_config(self, spec: ServiceSpec) -> None: options_failed_to_set.append(msg) if invalid_config_options: - self.mgr.set_health_warning('CEPHADM_INVALID_CONFIG_OPTION', f'Ignoring {len(invalid_config_options)} invalid config option(s)', len(invalid_config_options), invalid_config_options) + self.mgr.set_health_warning('CEPHADM_INVALID_CONFIG_OPTION', f'Ignoring {len(invalid_config_options)} invalid config option(s)', len( + invalid_config_options), invalid_config_options) if options_failed_to_set: - self.mgr.set_health_warning('CEPHADM_FAILED_SET_OPTION', f'Failed to set {len(options_failed_to_set)} option(s)', len(options_failed_to_set), options_failed_to_set) + self.mgr.set_health_warning('CEPHADM_FAILED_SET_OPTION', f'Failed to set {len(options_failed_to_set)} option(s)', len( + options_failed_to_set), options_failed_to_set) def _apply_service(self, spec: ServiceSpec) -> bool: """ @@ -867,7 +866,8 @@ def update_progress() -> None: daemons.append(sd) if daemon_place_fails: - self.mgr.set_health_warning('CEPHADM_DAEMON_PLACE_FAIL', f'Failed to place {len(daemon_place_fails)} daemon(s)', len(daemon_place_fails), daemon_place_fails) + self.mgr.set_health_warning('CEPHADM_DAEMON_PLACE_FAIL', f'Failed to place {len(daemon_place_fails)} daemon(s)', len( + daemon_place_fails), daemon_place_fails) # remove any? def _ok_to_stop(remove_daemons: List[orchestrator.DaemonDescription]) -> bool: diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 63d7eb67fee8c..f3ccf69904f19 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -899,8 +899,10 @@ def test_daemon_place_fail_health_warning(self, _run_cephadm, cephadm_module): assert not r assert cephadm_module.health_checks.get('CEPHADM_DAEMON_PLACE_FAIL') is not None assert cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['count'] == 1 - assert 'Failed to place 1 daemon(s)' in cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['summary'] - assert 'Failed while placing mgr.a on test: fail' in cephadm_module.health_checks['CEPHADM_DAEMON_PLACE_FAIL']['detail'] + assert 'Failed to place 1 daemon(s)' in cephadm_module.health_checks[ + 'CEPHADM_DAEMON_PLACE_FAIL']['summary'] + assert 'Failed while placing mgr.a on test: fail' in cephadm_module.health_checks[ + 'CEPHADM_DAEMON_PLACE_FAIL']['detail'] @mock.patch("cephadm.serve.CephadmServe._run_cephadm") def test_apply_spec_fail_health_warning(self, _run_cephadm, cephadm_module: CephadmOrchestrator): @@ -913,7 +915,8 @@ def test_apply_spec_fail_health_warning(self, _run_cephadm, cephadm_module: Ceph assert cephadm_module.apply_spec_fails assert cephadm_module.health_checks.get('CEPHADM_APPLY_SPEC_FAIL') is not None assert cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['count'] == 1 - assert 'Failed to apply 1 service(s)' in cephadm_module.health_checks['CEPHADM_APPLY_SPEC_FAIL']['summary'] + assert 'Failed to apply 1 service(s)' in cephadm_module.health_checks[ + 'CEPHADM_APPLY_SPEC_FAIL']['summary'] @mock.patch("cephadm.module.CephadmOrchestrator.get_foreign_ceph_option") @mock.patch("cephadm.serve.CephadmServe._run_cephadm") @@ -922,11 +925,14 @@ def test_invalid_config_option_health_warning(self, _run_cephadm, get_foreign_ce with with_host(cephadm_module, 'test'): ps = PlacementSpec(hosts=['test:0.0.0.0=a'], count=1) get_foreign_ceph_option.side_effect = KeyError - CephadmServe(cephadm_module)._apply_service_config(ServiceSpec('mgr', placement=ps, config={'test': 'foo'})) + CephadmServe(cephadm_module)._apply_service_config( + ServiceSpec('mgr', placement=ps, config={'test': 'foo'})) assert cephadm_module.health_checks.get('CEPHADM_INVALID_CONFIG_OPTION') is not None assert cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['count'] == 1 - assert 'Ignoring 1 invalid config option(s)' in cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['summary'] - assert 'Ignoring invalid mgr config option test' in cephadm_module.health_checks['CEPHADM_INVALID_CONFIG_OPTION']['detail'] + assert 'Ignoring 1 invalid config option(s)' in cephadm_module.health_checks[ + 'CEPHADM_INVALID_CONFIG_OPTION']['summary'] + assert 'Ignoring invalid mgr config option test' in cephadm_module.health_checks[ + 'CEPHADM_INVALID_CONFIG_OPTION']['detail'] @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) From 460f6ca0478c8320247f96a8d308197a0d9c432e Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Tue, 12 Oct 2021 09:55:21 +0800 Subject: [PATCH 0103/2866] crimson/os/seastore: set ExtentPlacementManager::allocated_to before rolling segments There are circumstances in which a transaction that are supposed to roll the current segment is invalidated after it finished writing and before it rolls the segment. If we don't set ExtentPlacementManager::allocated_to in this situation, another transaction can try to write to the old "allocated_to" position, which would cause an invalid write error Fixes: https://tracker.ceph.com/issues/52894 Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/extent_placement_manager.cc | 12 +++++------- src/crimson/os/seastore/extent_placement_manager.h | 3 +-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index 48a89ea273ae9..f3670f3c2eab2 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -69,9 +69,10 @@ SegmentedAllocator::Writer::finish_write( SegmentedAllocator::Writer::write_iertr::future<> SegmentedAllocator::Writer::_write( Transaction& t, - ool_record_t& record, - const record_size_t& record_size) + ool_record_t& record) { + record_size_t record_size = record.get_encoded_record_length(); + allocated_to += record_size.mdlength + record_size.dlength; bufferlist bl = record.encode( record_size, current_segment->segment->get_segment_id(), @@ -165,9 +166,8 @@ SegmentedAllocator::Writer::write( num_extents, current_segment->segment->get_segment_id(), allocated_to); - auto rsize = record.get_encoded_record_length(); return (num_extents ? - _write(t, record, rsize) : + _write(t, record) : write_iertr::now() ).si_then([this]() mutable { return roll_segment(false); @@ -179,7 +179,6 @@ SegmentedAllocator::Writer::write( add_extent_to_write(record, extent); it = extents.erase(it); } - record_size_t rsize = record.get_encoded_record_length(); DEBUGT( "writing {} extents to segment {} at {}", @@ -187,8 +186,7 @@ SegmentedAllocator::Writer::write( record.get_num_extents(), current_segment->segment->get_segment_id(), allocated_to); - allocated_to += rsize.mdlength + rsize.dlength; - return _write(t, record, rsize); + return _write(t, record); } ).si_then([]() -> write_iertr::future { diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index d9a15ea5577a8..690d817c2ed31 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -217,8 +217,7 @@ class SegmentedAllocator : public ExtentAllocator { write_iertr::future<> _write( Transaction& t, - ool_record_t& record, - const record_size_t& record_size); + ool_record_t& record); using roll_segment_ertr = crimson::errorator< crimson::ct_error::input_output_error>; From f7714de294dd7376a9a8ae5131aa429322b459c3 Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Fri, 8 Oct 2021 15:33:13 +0530 Subject: [PATCH 0104/2866] mgr/dashboard: monitoring: grafonnet refactoring for hosts dashboards This PR intends to refactor hosts dashboards using grafonnet Fixes:https://tracker.ceph.com/issues/52777 Signed-off-by: Aashish Sharma --- .../grafana/dashboards/host-details.json | 2366 ++++++++--------- .../grafana/dashboards/hosts-overview.json | 1658 ++++++------ .../jsonnet/grafana_dashboards.jsonnet | 176 +- 3 files changed, 2150 insertions(+), 2050 deletions(-) diff --git a/monitoring/grafana/dashboards/host-details.json b/monitoring/grafana/dashboards/host-details.json index b002ad2f09dd8..1d071d4ec336d 100644 --- a/monitoring/grafana/dashboards/host-details.json +++ b/monitoring/grafana/dashboards/host-details.json @@ -1,1215 +1,1193 @@ { - "__inputs": [], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.3.2" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ + "__inputs": [ ], + "__requires": [ { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1615564911000, - "links": [], - "panels": [ - { - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 16, - "title": "$ceph_hosts System Overview", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 0, - "y": 1 - }, - "height": "160", - "id": 1, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": "", - "title": "OSDs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": { - "interrupt": "#447EBC", - "steal": "#6D1F62", - "system": "#890F02", - "user": "#3F6833", - "wait": "#C15C17" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", - "fill": 1, - "gridPos": { - "h": 10, - "w": 6, - "x": 3, - "y": 1 - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mode}}", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilization", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "% Utilization", - "logBase": 1, - "max": "100", - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Available": "#508642", - "Free": "#508642", - "Total": "#bf1b00", - "Used": "#bf1b00", - "total": "#bf1b00", - "used": "#0a50a1" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - "h": 10, - "w": 6, - "x": 9, - "y": 1 - }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "color": "#bf1b00", - "fill": 0, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})\n )\n \n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "used", - "refId": "D" - }, - { - "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Free", - "refId": "A" - }, - { - "expr": "(node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) \n", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "buffers/cache", - "refId": "C" - }, - { - "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "total", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "RAM Usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "RAM used", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')", - "fill": 0, - "gridPos": { - "h": 10, - "w": 6, - "x": 15, - "y": 1 - }, - "id": 10, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*tx/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}.rx", - "refId": "A", - "step": 10, - "textEditor": true - }, - { - "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}.tx", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network Load", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": "Send (-) / Receive (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 21, - "y": 1 - }, - "hideTimeOverride": true, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*tx/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "{{device}}.rx", - "refId": "A" - }, - { - "expr": "irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}.tx", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network drop rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "pps", - "label": "Send (-) / Receive (+)", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "decimals": 0, - "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 0, - "y": 6 - }, - "height": "160", - "id": 2, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": "", - "title": "Raw Capacity", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 21, - "y": 6 - }, - "hideTimeOverride": true, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*tx/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "{{device}}.rx", - "refId": "A" - }, - { - "expr": "irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}.tx", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network error rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "pps", - "label": "Send (-) / Receive (+)", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "id": 12, - "panels": [], - "repeat": null, - "title": "OSD Disk Performance Statistics", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value", - "fill": 1, - "gridPos": { - "h": 9, - "w": 11, - "x": 0, - "y": 12 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*reads/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}({{ceph_daemon}}) writes", - "refId": "A", - "step": 10, - "textEditor": true - }, - { - "expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}}({{ceph_daemon}}) reads", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$ceph_hosts Disk IOPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" }, - "yaxes": [ - { - "format": "ops", - "label": "Read (-) / Write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id", - "fill": 1, - "gridPos": { - "h": 9, - "w": 11, - "x": 12, - "y": 12 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*read/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}({{ceph_daemon}}) write", - "refId": "B" - }, - { - "expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}({{ceph_daemon}}) read", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$ceph_hosts Throughput by Disk", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "$ceph_hosts System Overview", + "titleSize": "h6", + "type": "row" }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, - "yaxes": [ - { - "format": "Bps", - "label": "Read (-) / Write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id", - "fill": 1, - "gridPos": { - "h": 9, - "w": 11, - "x": 0, - "y": 21 + { + "aliasColors": { + "interrupt": "#447EBC", + "steal": "#6D1F62", + "system": "#890F02", + "user": "#3F6833", + "wait": "#C15C17" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", + "fill": 1, + "gridPos": { + "h": 10, + "w": 6, + "x": 3, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mode}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": "% Utilization", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + { + "aliasColors": { + "Available": "#508642", + "Free": "#508642", + "Total": "#bf1b00", + "Used": "#bf1b00", + "total": "#bf1b00", + "used": "#0a50a1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 10, + "w": 6, + "x": 9, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "total", + "color": "#bf1b00", + "fill": 0, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free", + "refId": "A" + }, + { + "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "total", + "refId": "B" + }, + { + "expr": "(node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) \n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "buffers/cache", + "refId": "C" + }, + { + "expr": "(node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})\n )\n \n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "used", + "refId": "D" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "RAM Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": "RAM used", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}}({{ceph_daemon}})", - "refId": "D" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$ceph_hosts Disk Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')", + "fill": 1, + "gridPos": { + "h": 10, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network drop rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "pps", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "yaxes": [ - { - "format": "s", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 11, - "x": 12, - "y": 21 + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 6 + }, + "id": 8, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 6 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network error rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "pps", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 2, - "nullPointMode": "connected", - "options": { - "dataLinks": [] + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 10, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Disk Performance Statistics", + "titleSize": "h6", + "type": "row" }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}}({{ceph_daemon}})", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$ceph_hosts Disk utilization", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 12 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) writes", + "refId": "A" + }, + { + "expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) reads", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 12 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*read/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) write", + "refId": "A" + }, + { + "expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Throughput by Disk", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "yaxes": [ - { - "format": "percent", - "label": "%Util", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "10s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [ { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 21 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Hostname", - "multi": false, - "name": "ceph_hosts", - "options": [], - "query": "label_values(node_scrape_collector_success, instance) ", - "refresh": 1, - "regex": "([^.:]*).*", - "skipUrlSync": false, - "sort": 3, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 21 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": "%Util", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Host Details", - "uid": "rtOg0AiWz", - "version": 4 + ], + "refresh": "10s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Hostname", + "multi": false, + "name": "ceph_hosts", + "options": [ ], + "query": "label_values(node_scrape_collector_success, instance) ", + "refresh": 1, + "regex": "([^.:]*).*", + "sort": 3, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Host Details", + "uid": "rtOg0AiWz", + "version": 0 } diff --git a/monitoring/grafana/dashboards/hosts-overview.json b/monitoring/grafana/dashboards/hosts-overview.json index b179d5717d536..b1e2d857dd142 100644 --- a/monitoring/grafana/dashboards/hosts-overview.json +++ b/monitoring/grafana/dashboards/hosts-overview.json @@ -1,852 +1,844 @@ { - "__inputs": [], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.3.2" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ + "__inputs": [ ], + "__requires": [ { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1557393917915, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(sum by (hostname) (ceph_osd_metadata))", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "OSD Hosts", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster", - "decimals": 2, - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "id": 6, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "AVG CPU Busy", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)", - "decimals": 2, - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "id": 9, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "AVG RAM Utilization", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "description": "IOPS Load at the device as reported by the OS on all OSD hosts", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "id": 2, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum ((irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])))", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Physical IOPS", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "id": 20, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr" : "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device, ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "AVG Disk Utilization", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 0, - "description": "Total send/receive network load across all hosts in the ceph cluster", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 0 + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" }, - "id": 18, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\nsum (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n )", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Network Load", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Show the top 10 busiest hosts by cpu", - "fill": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 5 - }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,100 * ( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Busy - Top 10 Hosts", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" }, - "yaxes": [ - { - "decimals": 1, - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Top 10 hosts by network load", - "fill": 0, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 5 - }, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (hostname) (ceph_osd_metadata))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10, (sum by(instance) (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ))\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Load - Top 10 Hosts", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG CPU Busy", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 4, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG RAM Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, - "yaxes": [ - { - "decimals": 1, - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "10s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "IOPS Load at the device as reported by the OS on all OSD hosts", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 5, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum ((irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Physical IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, { - "allValue": "", - "current": {}, - "datasource": "$datasource", - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "osd_hosts", - "options": [], - "query": "label_values(ceph_disk_occupation, exported_instance)", - "refresh": 1, - "regex": "([^.]*).*", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "ceph", - "type": "query", - "useTags": false + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 6, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device, ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG Disk Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "mon_hosts", - "options": [], - "query": "label_values(ceph_mon_metadata, ceph_daemon)", - "refresh": 1, - "regex": "mon.(.*)", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Total send/receive network load across all hosts in the ceph cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 7, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\nsum (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n )", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Network Load", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "mds_hosts", - "options": [], - "query": "label_values(ceph_mds_inodes, ceph_daemon)", - "refresh": 1, - "regex": "mds.(.*)", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the top 10 busiest hosts by cpu", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,100 * ( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy - Top 10 Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "rgw_hosts", - "options": [], - "query": "label_values(ceph_rgw_qlen, ceph_daemon)", - "refresh": 1, - "regex": "rgw.(.*)", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Top 10 hosts by network load", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, (sum by(instance) (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ))\n )\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network Load - Top 10 Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Host Overview", - "uid": "y0KGL0iZz", - "version": 3 + ], + "refresh": "10s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_hosts", + "options": [ ], + "query": "label_values(ceph_disk_occupation, exported_instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "mon_hosts", + "options": [ ], + "query": "label_values(ceph_mon_metadata, ceph_daemon)", + "refresh": 1, + "regex": "mon.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "mds_hosts", + "options": [ ], + "query": "label_values(ceph_mds_inodes, ceph_daemon)", + "refresh": 1, + "regex": "mds.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "rgw_hosts", + "options": [ ], + "query": "label_values(ceph_rgw_qlen, ceph_daemon)", + "refresh": 1, + "regex": "rgw.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Host Overview", + "uid": "y0KGL0iZz", + "version": 0 } diff --git a/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet b/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet index 11c89b212552f..6594cfe415fa2 100644 --- a/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet +++ b/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet @@ -1,54 +1,184 @@ local g = import 'grafana.libsonnet'; -local dashboardSchema(title, uid, time_from, refresh, schemaVersion, tags,timezone, timepicker) = - g.dashboard.new(title=title, uid=uid, time_from=time_from, refresh=refresh, schemaVersion=schemaVersion, tags=tags, timezone=timezone, timepicker=timepicker); +local dashboardSchema(title, description, uid, time_from, refresh, schemaVersion, tags, timezone, timepicker) = + g.dashboard.new(title=title, description=description, uid=uid, time_from=time_from, refresh=refresh, schemaVersion=schemaVersion, tags=tags, timezone=timezone, timepicker=timepicker); -local graphPanelSchema(title, nullPointMode, stack, formatY1, formatY2, labelY1, labelY2, min, fill, datasource) = - g.graphPanel.new(title=title, nullPointMode=nullPointMode, stack=stack, formatY1=formatY1, formatY2=formatY2, labelY1=labelY1, labelY2=labelY2, min=min, fill=fill, datasource=datasource); +local graphPanelSchema(aliasColors, title, description, nullPointMode, stack, formatY1, formatY2, labelY1, labelY2, min, fill, datasource) = + g.graphPanel.new(aliasColors=aliasColors, title=title, description=description, nullPointMode=nullPointMode, stack=stack, formatY1=formatY1, formatY2=formatY2, labelY1=labelY1, labelY2=labelY2, min=min, fill=fill, datasource=datasource); local addTargetSchema(expr, intervalFactor, format, legendFormat) = g.prometheus.target(expr=expr, intervalFactor=intervalFactor, format=format, legendFormat=legendFormat); -local addTemplateSchema(name, datasource, query, refresh, hide, includeAll, sort) = - g.template.new(name=name, datasource=datasource, query=query, refresh=refresh, hide=hide, includeAll=includeAll, sort=sort); +local addTemplateSchema(name, datasource, query, refresh, includeAll, sort, label, regex) = + g.template.new(name=name, datasource=datasource, query=query, refresh=refresh, includeAll=includeAll, sort=sort, label=label, regex=regex); local addAnnotationSchema(builtIn, datasource, enable, hide, iconColor, name, type) = g.annotation.datasource(builtIn=builtIn, datasource=datasource, enable=enable, hide=hide, iconColor=iconColor, name=name, type=type); +local addRowSchema(collapse, showTitle, title) = + g.row.new(collapse=collapse, showTitle=showTitle, title=title); + +local addSingelStatSchema(datasource, format, title, description, valueName, colorValue, gaugeMaxValue, gaugeShow, sparklineShow, thresholds) = + g.singlestat.new(datasource=datasource, format=format, title=title, description=description, valueName=valueName, colorValue=colorValue, gaugeMaxValue=gaugeMaxValue, gaugeShow=gaugeShow, sparklineShow=sparklineShow, thresholds=thresholds); + +local addPieChartSchema(aliasColors, datasource, description, legendType, pieType, title, valueName) = + g.pieChartPanel.new(aliasColors=aliasColors, datasource=datasource, description=description, legendType=legendType, pieType=pieType, title=title, valueName=valueName); + +local addTableSchema(datasource, description, sort, styles, title, transform) = + g.tablePanel.new(datasource=datasource, description=description, sort=sort, styles=styles, title=title, transform=transform); + +local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, pattern, thresholds, type, unit, valueMaps) = + {'alias': alias, 'colorMode': colorMode, 'colors':colors, 'dateFormat':dateFormat, 'decimals':decimals, 'mappingType':mappingType, 'pattern':pattern, 'thresholds':thresholds, 'type':type, 'unit':unit, 'valueMaps':valueMaps}; + { - "radosgw-sync-overview.json": - local RgwSyncOverviewPanel(title, formatY1, labelY1, rgwMetric, x, y, w, h) = - graphPanelSchema(title, 'null as zero', true, formatY1, 'short', labelY1, null, 0, 1, '$datasource') + "hosts-overview.json": + local HostsOverviewSingleStatPanel(format, title, description, valueName, expr, targetFormat, x, y, w, h) = + addSingelStatSchema('$datasource', format, title, description, valueName, false, 100, false, false, '') + .addTarget(addTargetSchema(expr, 1, targetFormat, '')) + {gridPos: {x: x, y: y, w: w, h: h}}; + + local HostsOverviewGraphPanel(title, description, formatY1, expr, legendFormat, x, y, w, h) = + graphPanelSchema({}, title, description, 'null', false, formatY1, 'short', null, null, 0, 1, '$datasource') .addTargets( - [addTargetSchema('sum by (source_zone) (rate(%s[30s]))' % rgwMetric, 1, 'time_series', '{{source_zone}}')]) + {gridPos: {x: x, y: y, w: w, h: h}}; + [addTargetSchema(expr, 1, 'time_series', legendFormat)]) + {gridPos: {x: x, y: y, w: w, h: h}}; dashboardSchema( - 'RGW Sync Overview', 'rgw-sync-overview', 'now-1h', '15s', 16, ["overview"], '', {refresh_intervals:['5s','10s','15s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']} + 'Host Overview', '', 'y0KGL0iZz', 'now-1h', '10s', 16, [], '', {refresh_intervals:['5s','10s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']} + ) + .addRequired( + type='grafana', id='grafana', name='Grafana', version='5.3.2' + ) + .addRequired( + type='panel', id='graph', name='Graph', version='5.0.0' + ) + .addRequired( + type='panel', id='singlestat', name='Singlestat', version='5.0.0' ) .addAnnotation( addAnnotationSchema( 1, '-- Grafana --', true, true, 'rgba(0, 211, 255, 1)', 'Annotations & Alerts', 'dashboard') ) + .addTemplate( + g.template.datasource('datasource', 'prometheus', 'default', label='Data Source') + ) + .addTemplate( + addTemplateSchema('osd_hosts', '$datasource', 'label_values(ceph_disk_occupation, exported_instance)', 1, true, 1, null, '([^.]*).*') + ) + .addTemplate( + addTemplateSchema('mon_hosts', '$datasource', 'label_values(ceph_mon_metadata, ceph_daemon)', 1, true, 1, null, 'mon.(.*)') + ) + .addTemplate( + addTemplateSchema('mds_hosts', '$datasource', 'label_values(ceph_mds_inodes, ceph_daemon)', 1, true, 1, null, 'mds.(.*)') + ) + .addTemplate( + addTemplateSchema('rgw_hosts', '$datasource', 'label_values(ceph_rgw_qlen, ceph_daemon)', 1, true, 1, null, 'rgw.(.*)') + ) + .addPanels([ + HostsOverviewSingleStatPanel( + 'none', 'OSD Hosts', '', 'current', 'count(sum by (hostname) (ceph_osd_metadata))', 'time_series', 0, 0, 4, 5), + HostsOverviewSingleStatPanel( + 'percentunit', 'AVG CPU Busy', 'Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster', 'current', 'avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )', 'time_series', 4, 0, 4, 5), + HostsOverviewSingleStatPanel( + 'percentunit', 'AVG RAM Utilization', 'Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)', 'current', 'avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))', 'time_series', 8, 0, 4, 5), + HostsOverviewSingleStatPanel( + 'none', 'Physical IOPS', 'IOPS Load at the device as reported by the OS on all OSD hosts', 'current', 'sum ((irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])))', 'time_series', 12, 0, 4, 5), + HostsOverviewSingleStatPanel( + 'percent', 'AVG Disk Utilization', 'Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)', 'current', 'avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device, ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)', 'time_series', 16, 0, 4, 5), + HostsOverviewSingleStatPanel( + 'bytes', 'Network Load', 'Total send/receive network load across all hosts in the ceph cluster', 'current', 'sum (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\nsum (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n )', 'time_series', 20, 0, 4, 5), + HostsOverviewGraphPanel( + 'CPU Busy - Top 10 Hosts', 'Show the top 10 busiest hosts by cpu', 'percent', 'topk(10,100 * ( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)', '{{instance}}', 0, 5, 12, 9), + HostsOverviewGraphPanel( + 'Network Load - Top 10 Hosts', 'Top 10 hosts by network load', 'Bps', 'topk(10, (sum by(instance) (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ))\n )\n)', '{{instance}}', 12, 5, 12, 9), + ]) +} +{ + "host-details.json": + local HostDetailsSingleStatPanel(format, title, description, valueName, expr, targetFormat, x, y, w, h) = + addSingelStatSchema('$datasource', format, title, description, valueName, false, 100, false, false, '') + .addTarget(addTargetSchema(expr, 1, targetFormat, '')) + {gridPos: {x: x, y: y, w: w, h: h}}; + + local HostDetailsGraphPanel(alias, title, description, nullPointMode, formatY1, labelY1, expr, legendFormat, x, y, w, h) = + graphPanelSchema(alias, title, description, nullPointMode, false, formatY1, 'short', labelY1, null, null, 1, '$datasource') + .addTargets( + [addTargetSchema(expr, 1, 'time_series', legendFormat)]) + {gridPos: {x: x, y: y, w: w, h: h}}; + + dashboardSchema( + 'Host Details', '', 'rtOg0AiWz', 'now-1h', '10s', 16, ['overview'], '', {refresh_intervals:['5s','10s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']} + ) .addRequired( - type='grafana', id='grafana', name='Grafana', version='5.0.0' + type='grafana', id='grafana', name='Grafana', version='5.3.2' ) .addRequired( type='panel', id='graph', name='Graph', version='5.0.0' ) - .addTemplate( - addTemplateSchema('rgw_servers', '$datasource', 'prometehus', 1, 2, true, 1) + .addRequired( + type='panel', id='singlestat', name='Singlestat', version='5.0.0' + ) + .addAnnotation( + addAnnotationSchema( + 1, '-- Grafana --', true, true, 'rgba(0, 211, 255, 1)', 'Annotations & Alerts', 'dashboard') ) .addTemplate( g.template.datasource('datasource', 'prometheus', 'default', label='Data Source') ) + .addTemplate( + addTemplateSchema('ceph_hosts', '$datasource', 'label_values(node_scrape_collector_success, instance) ', 1, false, 3, 'Hostname', '([^.:]*).*') + ) .addPanels([ - RgwSyncOverviewPanel( - 'Replication (throughput) from Source Zone', 'Bps', null, 'ceph_data_sync_from_zone_fetch_bytes_sum', 0, 0, 8, 7), - RgwSyncOverviewPanel( - 'Replication (objects) from Source Zone', 'short', 'Objects/s', 'ceph_data_sync_from_zone_fetch_bytes_count', 8, 0, 8, 7), - RgwSyncOverviewPanel( - 'Polling Request Latency from Source Zone', 'ms', null, 'ceph_data_sync_from_zone_poll_latency_sum', 16, 0, 8, 7), - RgwSyncOverviewPanel( - 'Unsuccessful Object Replications from Source Zone', 'short', 'Count/s', 'ceph_data_sync_from_zone_fetch_errors', 0, 7, 8, 7) + addRowSchema(false, true, '$ceph_hosts System Overview') + {gridPos: {x: 0, y: 0, w: 24, h: 1}}, + HostDetailsSingleStatPanel( + 'none', 'OSDs', '', 'current', 'count(sum by (ceph_daemon) (ceph_osd_metadata{hostname=\'$ceph_hosts\'}))', 'time_series', 0, 1, 3, 5 + ), + HostDetailsGraphPanel( + {"interrupt": "#447EBC","steal": "#6D1F62","system": "#890F02","user": "#3F6833","wait": "#C15C17"},'CPU Utilization', 'Shows the CPU breakdown. When multiple servers are selected, only the first host\'s cpu data is shown', 'null', 'percent', '% Utilization', 'sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100', '{{mode}}', 3, 1, 6, 10 + ), + HostDetailsGraphPanel( + {"Available": "#508642","Free": "#508642","Total": "#bf1b00","Used": "#bf1b00","total": "#bf1b00","used": "#0a50a1"},'RAM Usage', '', 'null', 'bytes', 'RAM used', 'node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ', 'Free', 9, 1, 6, 10) + .addTargets( + [addTargetSchema('node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ', 1, 'time_series', 'total'), + addTargetSchema('(node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) \n', 1, 'time_series', 'buffers/cache'), + addTargetSchema('(node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})\n )\n \n', 1, 'time_series', 'used')]) + .addSeriesOverride({"alias": "total","color": "#bf1b00","fill": 0,"linewidth": 2,"stack": false} + ), + HostDetailsGraphPanel( + {},'Network Load', 'Show the network load (rx,tx) across all interfaces (excluding loopback \'lo\')', 'null', 'decbytes', 'Send (-) / Receive (+)', 'sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)', '{{device}}.rx', 15, 1, 6, 10) + .addTargets( + [addTargetSchema('sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)', 1, 'time_series', '{{device}}.tx')]) + .addSeriesOverride({"alias": "/.*tx/","transform": "negative-Y"} + ), + HostDetailsGraphPanel( + {},'Network drop rate', '', 'null', 'pps', 'Send (-) / Receive (+)', 'irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])', '{{device}}.rx', 21, 1, 3, 5) + .addTargets( + [addTargetSchema('irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])', 1, 'time_series', '{{device}}.tx')]) + .addSeriesOverride({"alias": "/.*tx/","transform": "negative-Y"} + ), + HostDetailsSingleStatPanel( + 'bytes', 'Raw Capacity', 'Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.', 'current', 'sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})', 'time_series', 0, 6, 3, 5 + ), + HostDetailsGraphPanel( + {},'Network error rate', '', 'null', 'pps', 'Send (-) / Receive (+)', 'irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])', '{{device}}.rx', 21, 6, 3, 5) + .addTargets( + [addTargetSchema('irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])', 1, 'time_series', '{{device}}.tx')]) + .addSeriesOverride({"alias": "/.*tx/","transform": "negative-Y"} + ), + addRowSchema(false, true, 'OSD Disk Performance Statistics') + {gridPos: {x: 0, y: 11, w: 24, h: 1}}, + HostDetailsGraphPanel( + {},'$ceph_hosts Disk IOPS', 'For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it\'s name and corresponding OSD id value', 'connected', 'ops', 'Read (-) / Write (+)', 'label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )', '{{device}}({{ceph_daemon}}) writes', 0, 12, 11, 9) + .addTargets( + [addTargetSchema('label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )', 1, 'time_series', '{{device}}({{ceph_daemon}}) reads')]) + .addSeriesOverride({"alias": "/.*reads/","transform": "negative-Y"} + ), + HostDetailsGraphPanel( + {},'$ceph_hosts Throughput by Disk', 'For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id', 'connected', 'Bps', 'Read (-) / Write (+)', 'label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")', '{{device}}({{ceph_daemon}}) write', 12, 12, 11, 9) + .addTargets( + [addTargetSchema('label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")', 1, 'time_series', '{{device}}({{ceph_daemon}}) read')]) + .addSeriesOverride({"alias": "/.*read/","transform": "negative-Y"} + ), + HostDetailsGraphPanel( + {},'$ceph_hosts Disk Latency', 'For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it\'s corresponding OSD id', 'null as zero', 's', '', 'max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")', '{{device}}({{ceph_daemon}})', 0, 21, 11, 9 + ), + HostDetailsGraphPanel( + {},'$ceph_hosts Disk utilization', 'Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.', 'connected', 'percent', '%Util', 'label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")', '{{device}}({{ceph_daemon}})', 12, 21, 11, 9 + ) ]) -} +} \ No newline at end of file From f41eae16af9fa6725d5b714f068fab3f946d2927 Mon Sep 17 00:00:00 2001 From: Avan Thakkar Date: Fri, 11 Jun 2021 16:37:10 +0530 Subject: [PATCH 0105/2866] cephadm: expose gather-facts api method Fixes: https://tracker.ceph.com/issues/51209 This PR intends to expose host metadata(gather-facts) api method Signed-off-by: Avan Thakkar Signed-off-by: Aashish Sharma --- src/pybind/mgr/cephadm/module.py | 13 +++++++++++++ src/pybind/mgr/cephadm/tests/test_facts.py | 12 ++++++++++++ src/pybind/mgr/dashboard/services/orchestrator.py | 4 ++++ src/pybind/mgr/orchestrator/_interface.py | 6 ++++++ 4 files changed, 35 insertions(+) create mode 100644 src/pybind/mgr/cephadm/tests/test_facts.py diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 41476c1c99207..fcf2ac174c9b3 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1434,6 +1434,19 @@ def get_hosts(self): """ return list(self.inventory.all_specs()) + @handle_orch_error + def get_facts(self, hostname: Optional[str] = None) -> List[Dict[str, Any]]: + """ + Return a list of hosts metadata(gather_facts) managed by the orchestrator. + + Notes: + - skip async: manager reads from cache. + """ + if hostname: + return [self.cache.get_facts(hostname)] + + return [self.cache.get_facts(hostname) for hostname in self.cache.get_hosts()] + @handle_orch_error def add_host_label(self, host: str, label: str) -> str: self.inventory.add_label(host, label) diff --git a/src/pybind/mgr/cephadm/tests/test_facts.py b/src/pybind/mgr/cephadm/tests/test_facts.py new file mode 100644 index 0000000000000..79e6db00ce83c --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_facts.py @@ -0,0 +1,12 @@ +import pytest + +from ..inventory import HostCache +from ..import CephadmOrchestrator + + +@pytest.fixture() +def test_facts(): + facts = {'node-1.ceph.com', {'bios_version': 'F2', 'cpu_cores': 16}} + HostCache.facts = facts + ret_facts = CephadmOrchestrator.get_facts('node-1.ceph.com') + assert ret_facts == [{'bios_version': 'F2', 'cpu_cores': 16}] diff --git a/src/pybind/mgr/dashboard/services/orchestrator.py b/src/pybind/mgr/dashboard/services/orchestrator.py index 44309a8493917..88a6cb3a86085 100644 --- a/src/pybind/mgr/dashboard/services/orchestrator.py +++ b/src/pybind/mgr/dashboard/services/orchestrator.py @@ -66,6 +66,10 @@ def get(self, hostname: str) -> Optional[HostSpec]: def add(self, hostname: str, addr: str, labels: List[str]): return self.api.add_host(HostSpec(hostname, addr=addr, labels=labels)) + @wait_api_result + def get_facts(self, hostname: Optional[str] = None) -> List[Dict[str, Any]]: + return self.api.get_facts(hostname) + @wait_api_result def remove(self, hostname: str): return self.api.remove_host(hostname) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 158299885b1cb..91925a59e6843 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -376,6 +376,12 @@ def get_hosts(self) -> OrchResult[List[HostSpec]]: """ raise NotImplementedError() + def get_facts(self, hostname: Optional[str] = None) -> OrchResult[List[Dict[str, Any]]]: + """ + Return hosts metadata(gather_facts). + """ + raise NotImplementedError() + def add_host_label(self, host: str, label: str) -> OrchResult[str]: """ Add a host label From ed8dd300a88173b1e5efafb6bb061a15ea296c29 Mon Sep 17 00:00:00 2001 From: dheart Date: Tue, 12 Oct 2021 16:46:34 +0800 Subject: [PATCH 0106/2866] os/bluestore: _do_write_small fix head_pad Signed-off-by: dheart --- src/os/bluestore/BlueStore.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index c7406b995424b..9c8aa6d39814b 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -13654,7 +13654,7 @@ void BlueStore::_do_write_small( end_offs - offset + head_pad + tail_pad); } if (head_pad && - o->extent_map.has_any_lextents(offset - head_pad, chunk_size)) { + o->extent_map.has_any_lextents(offset - head_pad, head_pad)) { head_pad = 0; } if (tail_pad && o->extent_map.has_any_lextents(end_offs, tail_pad)) { From e3ad392fd04e9ade3d0fb3fd69f07c3d6bc6c366 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 12 Oct 2021 09:38:23 +0000 Subject: [PATCH 0107/2866] crimson: osd operations respect interruptor's InterruptCondition. For the sake of DRY. Signed-off-by: Radoslaw Zarzynski --- src/crimson/common/interruptible_future.h | 2 ++ .../osd/osd_operations/background_recovery.cc | 6 +++--- .../osd/osd_operations/client_request.cc | 18 +++++++++--------- .../osd_operations/internal_client_request.cc | 10 +++++----- .../osd/osd_operations/peering_event.cc | 8 ++++---- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/crimson/common/interruptible_future.h b/src/crimson/common/interruptible_future.h index f41bc4f3ededd..b490945f0f76d 100644 --- a/src/crimson/common/interruptible_future.h +++ b/src/crimson/common/interruptible_future.h @@ -1034,6 +1034,8 @@ template struct interruptor { public: + using condition = InterruptCond; + template [[gnu::always_inline]] static interruptible_future_detail diff --git a/src/crimson/osd/osd_operations/background_recovery.cc b/src/crimson/osd/osd_operations/background_recovery.cc index 7614cc9102a6c..50089dbeda684 100644 --- a/src/crimson/osd/osd_operations/background_recovery.cc +++ b/src/crimson/osd/osd_operations/background_recovery.cc @@ -70,7 +70,7 @@ UrgentRecovery::do_recovery() { logger().debug("{}: {}", __func__, *this); if (!pg->has_reset_since(epoch_started)) { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( pg->get_recovery_handler()->recover_missing(soid, need) ).then_interruptible([] { return seastar::make_ready_future(false); @@ -113,7 +113,7 @@ PglogBasedRecovery::do_recovery() { if (pg->has_reset_since(epoch_started)) return seastar::make_ready_future(false); - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( pg->get_recovery_handler()->start_recovery_ops( crimson::common::local_conf()->osd_recovery_max_single_start)); } @@ -134,7 +134,7 @@ BackfillRecovery::do_recovery() return seastar::make_ready_future(false); } // TODO: limits - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( // process_event() of our boost::statechart machine is non-reentrant. // with the backfill_pipeline we protect it from a second entry from // the implementation of BackfillListener. diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index d5d90f4a2061f..3a5111ce0d417 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -104,16 +104,16 @@ seastar::future<> ClientRequest::start() return interruptor::now(); }); } - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(pg).await_map) ).then_interruptible([this, &pg] { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( pg.osdmap_gate.wait_for_map(m->get_min_epoch())); }).then_interruptible([this, &pg](auto map) { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(pg).wait_for_active)); }).then_interruptible([this, &pg]() { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( pg.wait_for_active_blocker.wait()); }).then_interruptible([this, pgref=std::move(pgref)]() mutable { if (is_pg_op()) { @@ -157,7 +157,7 @@ ClientRequest::process_pg_op( ClientRequest::interruptible_future<> ClientRequest::process_op(Ref &pg) { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(*pg).recover_missing)) .then_interruptible( [this, pg]() mutable { @@ -172,14 +172,14 @@ ClientRequest::process_op(Ref &pg) CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, false); return conn->send(std::move(reply)); } else { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(*pg).get_obc)).then_interruptible( [this, pg]() mutable -> PG::load_obc_iertr::future<> { logger().debug("{}: got obc lock", *this); op_info.set_from_op(&*m, *pg->get_osdmap()); return pg->with_locked_obc(m->get_hobj(), op_info, [this, pg](auto obc) mutable { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(*pg).process) ).then_interruptible([this, pg, obc]() mutable { return do_process(pg, obc); @@ -217,13 +217,13 @@ ClientRequest::do_process(Ref& pg, crimson::osd::ObjectContextRef obc) [this, pg](auto submitted, auto all_completed) mutable { return submitted.then_interruptible( [this, pg] { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(*pg).wait_repop)); }).then_interruptible( [this, pg, all_completed=std::move(all_completed)]() mutable { return all_completed.safe_then_interruptible( [this, pg](MURef reply) { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(*pg).send_reply)).then_interruptible( [this, reply=std::move(reply)]() mutable{ return conn->send(std::move(reply)); diff --git a/src/crimson/osd/osd_operations/internal_client_request.cc b/src/crimson/osd/osd_operations/internal_client_request.cc index 3859b388a8cb2..8b15005ef3d20 100644 --- a/src/crimson/osd/osd_operations/internal_client_request.cc +++ b/src/crimson/osd/osd_operations/internal_client_request.cc @@ -43,18 +43,18 @@ seastar::future<> InternalClientRequest::start() return seastar::repeat([this] { logger().debug("{}: in repeat", *this); return interruptor::with_interruption([this]() mutable { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp().wait_for_active) ).then_interruptible([this] { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( pg->wait_for_active_blocker.wait()); }).then_interruptible([this] { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp().recover_missing) ).then_interruptible([this] { return do_recover_missing(pg, {}); }).then_interruptible([this] { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp().get_obc) ).then_interruptible([this] () -> PG::load_obc_iertr::future<> { logger().debug("{}: getting obc lock", *this); @@ -67,7 +67,7 @@ seastar::future<> InternalClientRequest::start() assert(ret == 0); return pg->with_locked_obc(get_target_oid(), op_info, [&osd_ops, this](auto obc) { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp().process) ).then_interruptible( [obc=std::move(obc), &osd_ops, this] { diff --git a/src/crimson/osd/osd_operations/peering_event.cc b/src/crimson/osd/osd_operations/peering_event.cc index 194876b1d5e13..398fc945a099f 100644 --- a/src/crimson/osd/osd_operations/peering_event.cc +++ b/src/crimson/osd/osd_operations/peering_event.cc @@ -70,18 +70,18 @@ seastar::future<> PeeringEvent::start() return complete_rctx(pg); } logger().debug("{}: pg present", *this); - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(*pg).await_map) ).then_interruptible([this, pg] { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( pg->osdmap_gate.wait_for_map(evt.get_epoch_sent())); }).then_interruptible([this, pg](auto) { - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(pp(*pg).process)); }).then_interruptible([this, pg] { // TODO: likely we should synchronize also with the pg log-based // recovery. - return with_blocking_future_interruptible( + return with_blocking_future_interruptible( handle.enter(BackfillRecovery::bp(*pg).process)); }).then_interruptible([this, pg] { pg->do_peering_event(evt, ctx); From f41676bee53f0ed0562faea9d358d2b873e20bfa Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 12 Oct 2021 11:23:45 +0000 Subject: [PATCH 0108/2866] crimson/osd: rectify a coding style issue in PglogBasedRecovery::do_recovery(). Signed-off-by: Radoslaw Zarzynski --- src/crimson/osd/osd_operations/background_recovery.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/crimson/osd/osd_operations/background_recovery.cc b/src/crimson/osd/osd_operations/background_recovery.cc index 50089dbeda684..a8ac12cfc495c 100644 --- a/src/crimson/osd/osd_operations/background_recovery.cc +++ b/src/crimson/osd/osd_operations/background_recovery.cc @@ -111,8 +111,9 @@ PglogBasedRecovery::PglogBasedRecovery( PglogBasedRecovery::interruptible_future PglogBasedRecovery::do_recovery() { - if (pg->has_reset_since(epoch_started)) + if (pg->has_reset_since(epoch_started)) { return seastar::make_ready_future(false); + } return with_blocking_future_interruptible( pg->get_recovery_handler()->start_recovery_ops( crimson::common::local_conf()->osd_recovery_max_single_start)); From 7b75040d257f599d760d830c0431e55615654026 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 12 Oct 2021 11:25:20 +0000 Subject: [PATCH 0109/2866] crimson: respect the CEPH_ARGS environment variable. Rook uses `CEPH_ARGS` to convey the `mon_host` configurable. Lack of support for it was the root cause of inability to reach out to monitors. See: https://gist.github.com/rzarzynski/95746aa73a48e811749a3b0aaeb31680#gistcomment-3924233. Signed-off-by: Radoslaw Zarzynski --- src/crimson/common/config_proxy.h | 8 ++++++++ src/crimson/osd/main.cc | 1 + 2 files changed, 9 insertions(+) diff --git a/src/crimson/common/config_proxy.h b/src/crimson/common/config_proxy.h index e56a6825f41c3..e27db9b5bedf3 100644 --- a/src/crimson/common/config_proxy.h +++ b/src/crimson/common/config_proxy.h @@ -179,6 +179,14 @@ class ConfigProxy : public seastar::peering_sharded_service }); } + seastar::future<> parse_env() { + return do_change([this](ConfigValues& values) { + get_config().parse_env(CEPH_ENTITY_TYPE_OSD, + values, + obs_mgr); + }); + } + seastar::future<> parse_config_files(const std::string& conf_files); using ShardedConfig = seastar::sharded; diff --git a/src/crimson/osd/main.cc b/src/crimson/osd/main.cc index 596dea0b1f4e4..18b7d520b8892 100644 --- a/src/crimson/osd/main.cc +++ b/src/crimson/osd/main.cc @@ -262,6 +262,7 @@ int main(int argc, char* argv[]) sharded_perf_coll().stop().get(); }); local_conf().parse_config_files(conf_file_list).get(); + local_conf().parse_env().get(); local_conf().parse_argv(ceph_args).get(); if (const auto ret = pidfile_write(local_conf()->pid_file); ret == -EACCES || ret == -EAGAIN) { From ad0b6094cebd2460795ac739ea5d6fddf16e9a79 Mon Sep 17 00:00:00 2001 From: Adam King Date: Tue, 12 Oct 2021 07:22:57 -0400 Subject: [PATCH 0110/2866] mgr/cephadm: remove host addr from agent deps Had no need to be a dependency anyway and actually was casuing issues in clusters with FQDNs due to the assumption that the daemon-id would match the hostname (which isn't necessarily true if it is an FQDN). Signed-off-by: Adam King --- src/pybind/mgr/cephadm/agent.py | 3 ++- src/pybind/mgr/cephadm/module.py | 4 ++-- src/pybind/mgr/cephadm/services/cephadmservice.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index ead4c0a3bee5a..e987add4ee26f 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -205,7 +205,7 @@ class AgentMessageThread(threading.Thread): def __init__(self, host: str, port: int, data: Dict[Any, Any], mgr: "CephadmOrchestrator") -> None: self.mgr = mgr self.host = host - self.addr = self.mgr.inventory.get_addr(host) + self.addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host self.port = port self.data: str = json.dumps(data) super(AgentMessageThread, self).__init__(target=self.run) @@ -261,6 +261,7 @@ def run(self) -> None: secure_agent_socket.sendall(msg.encode('utf-8')) agent_response = secure_agent_socket.recv(1024).decode() self.mgr.log.info(f'Received "{agent_response}" from agent on host {self.host}') + self.mgr.cache.sending_agent_message[self.host] = False return except ConnectionError as e: # if it's a connection error, possibly try to connect again. diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 41476c1c99207..8364696c9a95a 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2119,8 +2119,8 @@ def _calc_daemon_deps(self, root_cert = self.cherrypy_thread.ssl_certs.get_root_cert() except Exception: pass - deps = sorted([self.get_mgr_ip(), self.inventory.get_addr(daemon_id), str(self.endpoint_port), - root_cert, str(self.get_module_option('device_enhanced_scan'))]) + deps = sorted([self.get_mgr_ip(), str(self.endpoint_port), root_cert, + str(self.get_module_option('device_enhanced_scan'))]) elif daemon_type == 'iscsi': deps = [self.get_mgr_ip()] else: diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index f80bf34669d25..da353bd5257ee 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1043,6 +1043,6 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st 'listener.key': listener_key, } - return config, sorted([str(self.mgr.get_mgr_ip()), self.mgr.inventory.get_addr(daemon_spec.host), - str(self.mgr.endpoint_port), self.mgr.cherrypy_thread.ssl_certs.get_root_cert(), + return config, sorted([str(self.mgr.get_mgr_ip()), str(self.mgr.endpoint_port), + self.mgr.cherrypy_thread.ssl_certs.get_root_cert(), str(self.mgr.get_module_option('device_enhanced_scan'))]) From db5cfb15e55dadf7bd5c381f53a4ea548fcea152 Mon Sep 17 00:00:00 2001 From: Nizamudeen A Date: Thu, 7 Oct 2021 21:06:29 +0530 Subject: [PATCH 0111/2866] mgr/dashboard: Fix orchestrator/01-hosts.e2e-spec.ts failure The test is failing on deleting a host because the agent daemon is present in that host. Its not possible to simply delete a host. We need to drain it first and then delete it. Fixes: https://tracker.ceph.com/issues/52764 Signed-off-by: Nizamudeen A --- .../integration/orchestrator/01-hosts.e2e-spec.ts | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/01-hosts.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/01-hosts.e2e-spec.ts index cf85642a1b1d2..db72ddbd396cd 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/01-hosts.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/01-hosts.e2e-spec.ts @@ -21,8 +21,20 @@ describe('Hosts page', () => { hosts.add(hostname, true); }); - it('should delete a host and add it back', function () { + it('should drain and delete a host and then add it back', function () { const host = Cypress._.last(this.hosts)['name']; + + // should drain the host first before deleting + hosts.editLabels(host, ['_no_schedule'], true); + hosts.clickHostTab(host, 'Daemons'); + cy.get('cd-host-details').within(() => { + // draining will take some time to complete. + // since we don't know how many daemons will be + // running in this host in future putting the wait + // to 15s + cy.wait(15000); + hosts.getTableCount('total').should('be.eq', 0); + }); hosts.delete(host); // add it back From 631360a41ab2ad93445d35a9730200beef2bebab Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 12 Oct 2021 14:00:50 -0400 Subject: [PATCH 0112/2866] mon/MDSMonitor: avoid crash when decoding old FSMap epochs Fixes: https://tracker.ceph.com/issues/52820 Signed-off-by: Patrick Donnelly --- src/mds/FSMap.cc | 1 + src/mon/MDSMonitor.cc | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index d82f8c439cc57..9fe50244ef138 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -651,6 +651,7 @@ void FSMap::encode(bufferlist& bl, uint64_t features) const void FSMap::decode(bufferlist::const_iterator& p) { + struct_version = 0; DECODE_START(STRUCT_VERSION, p); DECODE_OLDEST(7); struct_version = struct_v; diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index c3a3d0046e4a0..b64671176f5ee 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -2335,7 +2335,12 @@ void MDSMonitor::tick() derr << "could not get version " << v << dendl; ceph_abort(); } - fsmap.decode(bl); + try { + fsmap.decode(bl); + } catch (const ceph::buffer::malformed_input& e) { + dout(5) << "flushing old fsmap struct because unable to decode FSMap: " << e.what() << dendl; + } + /* N.B. FSMap::is_struct_old is also true for undecoded (failed to decode) FSMap */ if (fsmap.is_struct_old()) { dout(5) << "fsmap struct is too old; proposing to flush out old versions" << dendl; do_propose = true; From eb3142710f17cbeec6393031fbb63ff6da7beb51 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Tue, 12 Oct 2021 14:22:10 -0400 Subject: [PATCH 0113/2866] rgw: raise log level of realm/zonegroup/zone output on startup this output shows up in all radosgw-admin commands, and isn't relevant to most of them Signed-off-by: Casey Bodley --- src/rgw/services/svc_zone.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc index a3502b3b5abe2..3bcdbfd0eea83 100644 --- a/src/rgw/services/svc_zone.cc +++ b/src/rgw/services/svc_zone.cc @@ -271,12 +271,12 @@ int RGWSI_Zone::do_start(optional_yield y, const DoutPrefixProvider *dpp) *zone_public_config = zone_iter->second; ldout(cct, 20) << "zone " << zone_params->get_name() << " found" << dendl; - ldpp_dout(dpp, 1) << "Realm: " << std::left << setw(20) << realm->get_name() << " (" << realm->get_id() << ")" << dendl; - ldpp_dout(dpp, 1) << "ZoneGroup: " << std::left << setw(20) << zonegroup->get_name() << " (" << zonegroup->get_id() << ")" << dendl; - ldpp_dout(dpp, 1) << "Zone: " << std::left << setw(20) << zone_params->get_name() << " (" << zone_params->get_id() << ")" << dendl; + ldpp_dout(dpp, 4) << "Realm: " << std::left << setw(20) << realm->get_name() << " (" << realm->get_id() << ")" << dendl; + ldpp_dout(dpp, 4) << "ZoneGroup: " << std::left << setw(20) << zonegroup->get_name() << " (" << zonegroup->get_id() << ")" << dendl; + ldpp_dout(dpp, 4) << "Zone: " << std::left << setw(20) << zone_params->get_name() << " (" << zone_params->get_id() << ")" << dendl; if (init_from_period) { - ldpp_dout(dpp, 1) << "using period configuration: " << current_period->get_id() << ":" << current_period->get_epoch() << dendl; + ldpp_dout(dpp, 4) << "using period configuration: " << current_period->get_id() << ":" << current_period->get_epoch() << dendl; ret = init_zg_from_period(dpp, y); if (ret < 0) { return ret; From 845d21505713b8c1f48fb530d225e47a4c76f842 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Tue, 12 Oct 2021 14:30:28 -0400 Subject: [PATCH 0114/2866] rgw: raise log level of http client headers these messages are especially spammy because the http client's dout prefix includes the entire request path: 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]receive_http_header 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]received header:HTTP/1.1 501 Not Implemented 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]receive_http_header 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]received header:Content-Length: 108 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]receive_http_header 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]received header:x-amz-request-id: tx00000000000000027e9ba-00616514d3-70a3-east 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]receive_http_header 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]received header:Accept-Ranges: bytes 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]receive_http_header 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]received header:Content-Type: application/json 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]receive_http_header 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]received header:Date: Tue, 12 Oct 2021 04:53:39 GMT 2021-10-12T04:53:39.281+0000 7f66a0ff9700 10 http_client[POST/http://example.com/admin/log?type=data¬ify2&source-zone=3a571642-9f5e-46d8-8186-9eca8cc79ac6&rgwx-zonegroup=8f3b29b1-ffc6-4c90-9d0c-9bd258028cd8]receive_http_header raise to 30 so they're only enabled for super-verbose logging Signed-off-by: Casey Bodley --- src/rgw/rgw_rest_client.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc index 54d185516eda7..68b40324ce268 100644 --- a/src/rgw/rgw_rest_client.cc +++ b/src/rgw/rgw_rest_client.cc @@ -49,7 +49,7 @@ int RGWHTTPSimpleRequest::receive_header(void *ptr, size_t len) char *s = (char *)ptr, *end = (char *)ptr + len; char *p = line; - ldpp_dout(this, 10) << "receive_http_header" << dendl; + ldpp_dout(this, 30) << "receive_http_header" << dendl; while (s != end) { if (*s == '\r') { @@ -58,7 +58,7 @@ int RGWHTTPSimpleRequest::receive_header(void *ptr, size_t len) } if (*s == '\n') { *p = '\0'; - ldpp_dout(this, 10) << "received header:" << line << dendl; + ldpp_dout(this, 30) << "received header:" << line << dendl; // TODO: fill whatever data required here char *l = line; char *tok = strsep(&l, " \t:"); From 5655329b3ad6a46514ca89ef1ce8436b50d09b94 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 12 Oct 2021 14:39:42 -0400 Subject: [PATCH 0115/2866] mds/FSMap: do not assert allow_standby_replay on old FSMaps Octopus and older may have standby-replay daemons with allow_standby_replay==false. Fixes: https://tracker.ceph.com/issues/52874 Signed-off-by: Patrick Donnelly --- src/mds/FSMap.cc | 8 ++++++-- src/mds/FSMap.h | 2 +- src/mon/MDSMonitor.cc | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index d82f8c439cc57..1c9d3f52bccdc 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -850,8 +850,12 @@ const MDSMap::mds_info_t* FSMap::find_replacement_for(mds_role_t role) const return get_available_standby(*fs); } -void FSMap::sanity() const +void FSMap::sanity(bool pending) const { + /* Only do some sanity checks on **new** FSMaps. Older versions may not be + * compliant. + */ + if (legacy_client_fscid != FS_CLUSTER_ID_NONE) { ceph_assert(filesystems.count(legacy_client_fscid) == 1); } @@ -868,7 +872,7 @@ void FSMap::sanity() const ceph_assert(fs->mds_map.failed.count(info.rank) == 0); ceph_assert(fs->mds_map.damaged.count(info.rank) == 0); } else { - ceph_assert(fs->mds_map.allows_standby_replay()); + ceph_assert(!pending || fs->mds_map.allows_standby_replay()); } ceph_assert(info.compat.writeable(fs->mds_map.compat)); } diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h index 62fa0a414f934..197d5141c17d5 100644 --- a/src/mds/FSMap.h +++ b/src/mds/FSMap.h @@ -568,7 +568,7 @@ class FSMap { * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are * all self-consistent. */ - void sanity() const; + void sanity(bool pending=false) const; void encode(ceph::buffer::list& bl, uint64_t features) const; void decode(ceph::buffer::list::const_iterator& p); diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index c3a3d0046e4a0..7818e4258c6bc 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -190,7 +190,7 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t) // print map iff 'debug mon = 30' or higher print_map<30>(pending); if (!g_conf()->mon_mds_skip_sanity) { - pending.sanity(); + pending.sanity(true); } // Set 'modified' on maps modified this epoch From b57539dc947fd8a9f7164b6b4044288adcac2d8d Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Tue, 12 Oct 2021 12:43:34 -0600 Subject: [PATCH 0116/2866] Revert "qa: support isal ec test for aarch64" This commit has been causing scheduled jobs to request e.g. aarch64 smithi machines, which don't exist. The dispatcher then tries to find them forever, requiring the dispatcher to be killed and restarted. The queue will sit idle until someone notices the problem. Signed-off-by: Zack Cerza --- qa/standalone/erasure-code/test-erasure-code-plugins.sh | 2 +- qa/suites/rados/thrash-erasure-code-isa/arch/aarch64.yaml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 qa/suites/rados/thrash-erasure-code-isa/arch/aarch64.yaml diff --git a/qa/standalone/erasure-code/test-erasure-code-plugins.sh b/qa/standalone/erasure-code/test-erasure-code-plugins.sh index 677a766863939..b5648d472b973 100755 --- a/qa/standalone/erasure-code/test-erasure-code-plugins.sh +++ b/qa/standalone/erasure-code/test-erasure-code-plugins.sh @@ -14,7 +14,7 @@ case $arch in aarch64*|arm*) legacy_jerasure_plugins=(jerasure_generic jerasure_neon) legacy_shec_plugins=(shec_generic shec_neon) - plugins=(jerasure shec lrc isa) + plugins=(jerasure shec lrc) ;; *) echo "unsupported platform ${arch}." diff --git a/qa/suites/rados/thrash-erasure-code-isa/arch/aarch64.yaml b/qa/suites/rados/thrash-erasure-code-isa/arch/aarch64.yaml deleted file mode 100644 index 6399b9959b595..0000000000000 --- a/qa/suites/rados/thrash-erasure-code-isa/arch/aarch64.yaml +++ /dev/null @@ -1 +0,0 @@ -arch: aarch64 From 869d1a5d9d6c3ebd56586b8e8b854ea242ea5e0d Mon Sep 17 00:00:00 2001 From: Jingya Su Date: Mon, 11 Oct 2021 22:04:23 -0400 Subject: [PATCH 0117/2866] mgr/volumes/fs/operations/versions: fix various flake8 issues Fixes: https://tracker.ceph.com/issues/51402 Signed-off-by: Jingya Su --- .../fs/operations/versions/subvolume_base.py | 170 ++++++++++++------ 1 file changed, 119 insertions(+), 51 deletions(-) diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py index 2840a9f2ea354..346751b221092 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py @@ -1,6 +1,6 @@ import os import stat -import uuid + import errno import logging from hashlib import md5 @@ -9,16 +9,16 @@ import cephfs from ..pin_util import pin -from .subvolume_attrs import SubvolumeTypes, SubvolumeStates +from .subvolume_attrs import SubvolumeTypes from .metadata_manager import MetadataManager from ..trash import create_trashcan, open_trashcan from ...fs_util import get_ancestor_xattr from ...exception import MetadataMgrException, VolumeException -from .op_sm import SubvolumeOpSm from .auth_metadata import AuthMetadataManager log = logging.getLogger(__name__) + class SubvolumeBase(object): LEGACY_CONF_DIR = "_legacy" @@ -69,7 +69,8 @@ def config_path(self): @property def legacy_dir(self): - return os.path.join(self.vol_spec.base_dir.encode('utf-8'), SubvolumeBase.LEGACY_CONF_DIR.encode('utf-8')) + return (os.path.join(self.vol_spec.base_dir.encode('utf-8'), + SubvolumeBase.LEGACY_CONF_DIR.encode('utf-8'))) @property def legacy_config_path(self): @@ -105,7 +106,10 @@ def path(self): @property def features(self): - """ List of features supported by the subvolume, containing items from SubvolumeFeatures """ + """ + List of features supported by the subvolume, + containing items from SubvolumeFeatures + """ raise NotImplementedError @property @@ -115,7 +119,8 @@ def state(self): @property def subvol_type(self): - return SubvolumeTypes.from_value(self.metadata_mgr.get_global_option(MetadataManager.GLOBAL_META_KEY_TYPE)) + return (SubvolumeTypes.from_value(self.metadata_mgr.get_global_option + (MetadataManager.GLOBAL_META_KEY_TYPE))) @property def purgeable(self): @@ -124,15 +129,19 @@ def purgeable(self): def load_config(self): if self.legacy_mode: - self.metadata_mgr = MetadataManager(self.fs, self.legacy_config_path, 0o640) + self.metadata_mgr = MetadataManager(self.fs, + self.legacy_config_path, + 0o640) else: - self.metadata_mgr = MetadataManager(self.fs, self.config_path, 0o640) + self.metadata_mgr = MetadataManager(self.fs, + self.config_path, 0o640) def get_attrs(self, pathname): # get subvolume attributes - attrs = {} # type: Dict[str, Union[int, str, None]] + attrs = {} # type: Dict[str, Union[int, str, None]] stx = self.fs.statx(pathname, - cephfs.CEPH_STATX_UID | cephfs.CEPH_STATX_GID | cephfs.CEPH_STATX_MODE, + cephfs.CEPH_STATX_UID | cephfs.CEPH_STATX_GID + | cephfs.CEPH_STATX_MODE, cephfs.AT_SYMLINK_NOFOLLOW) attrs["uid"] = int(stx["uid"]) @@ -140,17 +149,24 @@ def get_attrs(self, pathname): attrs["mode"] = int(int(stx["mode"]) & ~stat.S_IFMT(stx["mode"])) try: - attrs["data_pool"] = self.fs.getxattr(pathname, 'ceph.dir.layout.pool').decode('utf-8') + attrs["data_pool"] = self.fs.getxattr(pathname, + 'ceph.dir.layout.pool' + ).decode('utf-8') except cephfs.NoData: attrs["data_pool"] = None try: - attrs["pool_namespace"] = self.fs.getxattr(pathname, 'ceph.dir.layout.pool_namespace').decode('utf-8') + attrs["pool_namespace"] = self.fs.getxattr(pathname, + 'ceph.dir.layout' + '.pool_namespace' + ).decode('utf-8') except cephfs.NoData: attrs["pool_namespace"] = None try: - attrs["quota"] = int(self.fs.getxattr(pathname, 'ceph.quota.max_bytes').decode('utf-8')) + attrs["quota"] = int(self.fs.getxattr(pathname, + 'ceph.quota.max_bytes' + ).decode('utf-8')) except cephfs.NoData: attrs["quota"] = None @@ -162,9 +178,11 @@ def set_attrs(self, path, attrs): quota = attrs.get("quota") if quota is not None: try: - self.fs.setxattr(path, 'ceph.quota.max_bytes', str(quota).encode('utf-8'), 0) - except cephfs.InvalidValue as e: - raise VolumeException(-errno.EINVAL, "invalid size specified: '{0}'".format(quota)) + self.fs.setxattr(path, 'ceph.quota.max_bytes', + str(quota).encode('utf-8'), 0) + except cephfs.InvalidValue: + raise VolumeException(-errno.EINVAL, + "invalid size specified: '{0}'".format(quota)) except cephfs.Error as e: raise VolumeException(-e.args[0], e.args[1]) @@ -172,10 +190,13 @@ def set_attrs(self, path, attrs): data_pool = attrs.get("data_pool") if data_pool is not None: try: - self.fs.setxattr(path, 'ceph.dir.layout.pool', data_pool.encode('utf-8'), 0) + self.fs.setxattr(path, 'ceph.dir.layout.pool', + data_pool.encode('utf-8'), 0) except cephfs.InvalidValue: raise VolumeException(-errno.EINVAL, - "invalid pool layout '{0}' -- need a valid data pool".format(data_pool)) + "invalid pool layout '{0}'" + "--need a valid data pool" + .format(data_pool)) except cephfs.Error as e: raise VolumeException(-e.args[0], e.args[1]) @@ -183,19 +204,22 @@ def set_attrs(self, path, attrs): xattr_key = xattr_val = None pool_namespace = attrs.get("pool_namespace") if pool_namespace is not None: - # enforce security isolation, use separate namespace for this subvolume + # enforce security isolation, use separate namespace + # for this subvolume xattr_key = 'ceph.dir.layout.pool_namespace' xattr_val = pool_namespace elif not data_pool: - # If subvolume's namespace layout is not set, then the subvolume's pool + # If subvolume's namespace layout is not set, + # then the subvolume's pool # layout remains unset and will undesirably change with ancestor's # pool layout changes. xattr_key = 'ceph.dir.layout.pool' xattr_val = None try: self.fs.getxattr(path, 'ceph.dir.layout.pool').decode('utf-8') - except cephfs.NoData as e: - xattr_val = get_ancestor_xattr(self.fs, os.path.split(path)[0], "ceph.dir.layout.pool") + except cephfs.NoData: + xattr_val = get_ancestor_xattr(self.fs, os.path.split(path)[0], + "ceph.dir.layout.pool") if xattr_key and xattr_val: try: self.fs.setxattr(path, xattr_key, xattr_val.encode('utf-8'), 0) @@ -230,16 +254,21 @@ def _resize(self, path, newsize, noshrink): try: newsize = int(newsize) if newsize <= 0: - raise VolumeException(-errno.EINVAL, "Invalid subvolume size") + raise VolumeException(-errno.EINVAL, + "Invalid subvolume size") except ValueError: newsize = newsize.lower() if not (newsize == "inf" or newsize == "infinite"): - raise VolumeException(-errno.EINVAL, "invalid size option '{0}'".format(newsize)) + raise (VolumeException(-errno.EINVAL, + "invalid size option '{0}'" + .format(newsize))) newsize = 0 noshrink = False try: - maxbytes = int(self.fs.getxattr(path, 'ceph.quota.max_bytes').decode('utf-8')) + maxbytes = int(self.fs.getxattr(path, + 'ceph.quota.max_bytes' + ).decode('utf-8')) except cephfs.NoData: maxbytes = 0 except cephfs.Error as e: @@ -248,25 +277,38 @@ def _resize(self, path, newsize, noshrink): subvolstat = self.fs.stat(path) if newsize > 0 and newsize < subvolstat.st_size: if noshrink: - raise VolumeException(-errno.EINVAL, "Can't resize the subvolume. The new size '{0}' would be lesser than the current " - "used size '{1}'".format(newsize, subvolstat.st_size)) + raise VolumeException(-errno.EINVAL, + "Can't resize the subvolume. " + "The new size '{0}' would be " + "lesser than the current " + "used size '{1}'" + .format(newsize, + subvolstat.st_size)) if not newsize == maxbytes: try: - self.fs.setxattr(path, 'ceph.quota.max_bytes', str(newsize).encode('utf-8'), 0) + self.fs.setxattr(path, 'ceph.quota.max_bytes', + str(newsize).encode('utf-8'), 0) except cephfs.Error as e: - raise VolumeException(-e.args[0], "Cannot set new size for the subvolume. '{0}'".format(e.args[1])) + raise (VolumeException(-e.args[0], + "Cannot set new size" + "for the subvolume. '{0}'" + .format(e.args[1]))) return newsize, subvolstat.st_size def pin(self, pin_type, pin_setting): return pin(self.fs, self.base_path, pin_type, pin_setting) - def init_config(self, version, subvolume_type, subvolume_path, subvolume_state): - self.metadata_mgr.init(version, subvolume_type.value, subvolume_path, subvolume_state.value) + def init_config(self, version, subvolume_type, + subvolume_path, subvolume_state): + self.metadata_mgr.init(version, subvolume_type.value, + subvolume_path, subvolume_state.value) self.metadata_mgr.flush() def discover(self): - log.debug("discovering subvolume '{0}' [mode: {1}]".format(self.subvolname, "legacy" if self.legacy_mode else "new")) + log.debug("discovering subvolume " + "'{0}' [mode: {1}]".format(self.subvolname, "legacy" + if self.legacy_mode else "new")) try: self.fs.stat(self.base_path) self.metadata_mgr.refresh() @@ -280,8 +322,13 @@ def discover(self): raise except cephfs.Error as e: if e.args[0] == errno.ENOENT: - raise VolumeException(-errno.ENOENT, "subvolume '{0}' does not exist".format(self.subvolname)) - raise VolumeException(-e.args[0], "error accessing subvolume '{0}'".format(self.subvolname)) + raise (VolumeException(-errno.ENOENT, + "subvolume '{0}' " + "does not exist" + .format(self.subvolname))) + raise VolumeException(-e.args[0], + "error accessing subvolume '{0}'" + .format(self.subvolname)) def _trash_dir(self, path): create_trashcan(self.fs, self.vol_spec) @@ -293,7 +340,8 @@ def _link_dir(self, path, bname): create_trashcan(self.fs, self.vol_spec) with open_trashcan(self.fs, self.vol_spec) as trashcan: trashcan.link(path, bname) - log.info("subvolume path '{0}' linked in trashcan bname {1}".format(path, bname)) + log.info("subvolume path '{0}' " + "linked in trashcan bname {1}".format(path, bname)) def trash_base_dir(self): if self.legacy_mode: @@ -306,29 +354,49 @@ def create_base_dir(self, mode): except cephfs.Error as e: raise VolumeException(-e.args[0], e.args[1]) - def info (self): - subvolpath = self.metadata_mgr.get_global_option(MetadataManager.GLOBAL_META_KEY_PATH) + def info(self): + subvolpath = (self.metadata_mgr.get_global_option( + MetadataManager.GLOBAL_META_KEY_PATH)) etype = self.subvol_type - st = self.fs.statx(subvolpath, cephfs.CEPH_STATX_BTIME | cephfs.CEPH_STATX_SIZE | - cephfs.CEPH_STATX_UID | cephfs.CEPH_STATX_GID | - cephfs.CEPH_STATX_MODE | cephfs.CEPH_STATX_ATIME | - cephfs.CEPH_STATX_MTIME | cephfs.CEPH_STATX_CTIME, - cephfs.AT_SYMLINK_NOFOLLOW) + st = self.fs.statx(subvolpath, cephfs.CEPH_STATX_BTIME + | cephfs.CEPH_STATX_SIZE + | cephfs.CEPH_STATX_UID | cephfs.CEPH_STATX_GID + | cephfs.CEPH_STATX_MODE | cephfs.CEPH_STATX_ATIME + | cephfs.CEPH_STATX_MTIME + | cephfs.CEPH_STATX_CTIME, + cephfs.AT_SYMLINK_NOFOLLOW) usedbytes = st["size"] try: - nsize = int(self.fs.getxattr(subvolpath, 'ceph.quota.max_bytes').decode('utf-8')) + nsize = int(self.fs.getxattr(subvolpath, + 'ceph.quota.max_bytes' + ).decode('utf-8')) except cephfs.NoData: nsize = 0 try: - data_pool = self.fs.getxattr(subvolpath, 'ceph.dir.layout.pool').decode('utf-8') - pool_namespace = self.fs.getxattr(subvolpath, 'ceph.dir.layout.pool_namespace').decode('utf-8') + data_pool = self.fs.getxattr(subvolpath, + 'ceph.dir.layout.pool' + ).decode('utf-8') + pool_namespace = self.fs.getxattr(subvolpath, + 'ceph.dir.layout.pool_namespace' + ).decode('utf-8') except cephfs.Error as e: raise VolumeException(-e.args[0], e.args[1]) - return {'path': subvolpath, 'type': etype.value, 'uid': int(st["uid"]), 'gid': int(st["gid"]), - 'atime': str(st["atime"]), 'mtime': str(st["mtime"]), 'ctime': str(st["ctime"]), - 'mode': int(st["mode"]), 'data_pool': data_pool, 'created_at': str(st["btime"]), - 'bytes_quota': "infinite" if nsize == 0 else nsize, 'bytes_used': int(usedbytes), - 'bytes_pcent': "undefined" if nsize == 0 else '{0:.2f}'.format((float(usedbytes) / nsize) * 100.0), - 'pool_namespace': pool_namespace, 'features': self.features, 'state': self.state.value} + return {'path': subvolpath, + 'type': etype.value, + 'uid': int(st["uid"]), + 'gid': int(st["gid"]), + 'atime': str(st["atime"]), + 'mtime': str(st["mtime"]), + 'ctime': str(st["ctime"]), + 'mode': int(st["mode"]), + 'data_pool': data_pool, + 'created_at': str(st["btime"]), + 'bytes_quota': "infinite" if nsize == 0 else nsize, + 'bytes_used': int(usedbytes), + 'bytes_pcent': "undefined" + if nsize == 0 + else '{0:.2f}'.format((float(usedbytes) / nsize) * 100.0), + 'pool_namespace': pool_namespace, + 'features': self.features, 'state': self.state.value} From 70f1c33707fe0c3bbf00a3ded5d6678b445e4e92 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Wed, 13 Oct 2021 08:35:24 +0800 Subject: [PATCH 0118/2866] test/librbd: re-enable BlockGuard test. This patch partly revert PR#16255. Signed-off-by: Jianpeng Ma --- src/test/librbd/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/librbd/CMakeLists.txt b/src/test/librbd/CMakeLists.txt index a110016efd2eb..df6e8c5e92da8 100644 --- a/src/test/librbd/CMakeLists.txt +++ b/src/test/librbd/CMakeLists.txt @@ -11,7 +11,6 @@ set(librbd_test test_ImageWatcher.cc test_internal.cc test_mirroring.cc - test_BlockGuard.cc test_DeepCopy.cc test_Groups.cc test_Migration.cc @@ -41,6 +40,7 @@ target_link_libraries(rbd_test_mock PUBLIC # unittest_librbd # doesn't use add_ceph_test because it is called by run-rbd-unit-tests.sh set(unittest_librbd_srcs + test_BlockGuard.cc test_main.cc test_mock_fixture.cc test_mock_ConfigWatcher.cc From 82bcf9b57fc4939677ef1af20a1a3fa258243a37 Mon Sep 17 00:00:00 2001 From: myoungwon oh Date: Tue, 12 Oct 2021 12:09:22 +0900 Subject: [PATCH 0119/2866] test: increase retry duration when calculating manifest ref. count In situation where the object is degraded and delayed, retry time can expire before the object is recovered ---it takes almost 6 minutes to be recovered according to the log. Fixes: https://tracker.ceph.com/issues/52872 Signed-off-by: Myoungwon Oh --- src/test/librados/tier_cxx.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/librados/tier_cxx.cc b/src/test/librados/tier_cxx.cc index 4239084b6c714..287c6fa2ae6ac 100644 --- a/src/test/librados/tier_cxx.cc +++ b/src/test/librados/tier_cxx.cc @@ -177,7 +177,8 @@ void is_intended_refcount_state(librados::IoCtx& src_ioctx, } dst_refcount = refs.count(); } - for (int tries = 0; tries < 15; ++tries) { + int tries = 0; + for (; tries < 30; ++tries) { r = cls_cas_references_chunk(src_ioctx, src_oid, dst_oid); if (r == -ENOENT || r == -ENOLINK) { src_refcount = 0; @@ -189,6 +190,7 @@ void is_intended_refcount_state(librados::IoCtx& src_ioctx, } break; } + ASSERT_TRUE(tries < 30); ASSERT_TRUE(src_refcount >= 0); ASSERT_TRUE(src_refcount == expected_refcount); ASSERT_TRUE(src_refcount <= dst_refcount); From 4e6a50baeb37831bbd09cea7cc71428c47c5bbaa Mon Sep 17 00:00:00 2001 From: Michael Fritch Date: Mon, 11 Oct 2021 12:10:17 -0600 Subject: [PATCH 0120/2866] cephadm: use the current cephadm binary for the agent 78983ad0d0cc introduced passing the cephadm binary via config json, when the currently deployed cephadm binary could be used in the agent's unit.run file instead Signed-off-by: Michael Fritch --- src/cephadm/cephadm | 6 +++--- src/pybind/mgr/cephadm/services/cephadmservice.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index ed392bd187d75..95d9fbc246135 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -3505,7 +3505,7 @@ class CephadmAgent(): loop_interval = 30 stop = False - required_files = ['cephadm', 'agent.json', 'keyring'] + required_files = ['agent.json', 'keyring'] def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''): self.ctx = ctx @@ -3515,7 +3515,6 @@ class CephadmAgent(): self.target_port = '' self.host = '' self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}') - self.binary_path = os.path.join(self.daemon_dir, 'cephadm') self.config_path = os.path.join(self.daemon_dir, 'agent.json') self.keyring_path = os.path.join(self.daemon_dir, 'keyring') self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem') @@ -3559,7 +3558,8 @@ class CephadmAgent(): def unit_run(self) -> str: py3 = shutil.which('python3') - return ('set -e\n' + f'{py3} {self.binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n') + binary_path = os.path.realpath(sys.argv[0]) + return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n') def unit_file(self) -> str: return """#generated by cephadm diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index f80bf34669d25..0cf78cb82a61b 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1036,7 +1036,6 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st self.mgr.inventory.get_addr(daemon_spec.host)) config = { 'agent.json': json.dumps(cfg), - 'cephadm': self.mgr._cephadm, 'keyring': daemon_spec.keyring, 'root_cert.pem': self.mgr.cherrypy_thread.ssl_certs.get_root_cert(), 'listener.crt': listener_cert, From ffe7de88530dda661fca5fd0075434d4c8a349ac Mon Sep 17 00:00:00 2001 From: Kalpesh Pandya Date: Wed, 22 Sep 2021 15:56:21 +0530 Subject: [PATCH 0121/2866] src/test: HTTP POST object upload This solves the tracker: https://tracker.ceph.com/issues/51724 Basically it is using 'generate_presigned_post()' boto3 API. This is verified under AMQP endpoint. Signed-off-by: Kalpesh Pandya --- src/test/rgw/bucket_notification/test_bn.py | 73 ++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/src/test/rgw/bucket_notification/test_bn.py b/src/test/rgw/bucket_notification/test_bn.py index ccecfbaf9ce22..ec6b74d661714 100644 --- a/src/test/rgw/bucket_notification/test_bn.py +++ b/src/test/rgw/bucket_notification/test_bn.py @@ -1728,6 +1728,77 @@ def test_ps_s3_creation_triggers_on_master_ssl(): del os.environ['RABBITMQ_CONFIG_FILE'] +@attr('amqp_test') +def test_http_post_object_upload(): + """ test that uploads object using HTTP POST """ + + import boto3 + from collections import OrderedDict + import requests + + hostname = get_ip() + zonegroup = 'default' + conn = connection() + + endpoint = "http://%s:%d" % (get_config_host(), get_config_port()) + + conn1 = boto3.client(service_name='s3', + aws_access_key_id=get_access_key(), + aws_secret_access_key=get_secret_key(), + endpoint_url=endpoint, + ) + + bucket_name = gen_bucket_name() + topic_name = bucket_name + TOPIC_SUFFIX + + key_name = 'foo.txt' + + resp = conn1.generate_presigned_post(Bucket=bucket_name, Key=key_name,) + + url = resp['url'] + + bucket = conn1.create_bucket(ACL='public-read-write', Bucket=bucket_name) + + # start amqp receivers + exchange = 'ex1' + task1, receiver1 = create_amqp_receiver_thread(exchange, topic_name+'_1') + task1.start() + + # create s3 topics + endpoint_address = 'amqp://' + hostname + endpoint_args = 'push-endpoint=' + endpoint_address + '&amqp-exchange=' + exchange + '&amqp-ack-level=broker' + topic_conf1 = PSTopicS3(conn, topic_name+'_1', zonegroup, endpoint_args=endpoint_args) + topic_arn1 = topic_conf1.set_config() + + # create s3 notifications + notification_name = bucket_name + NOTIFICATION_SUFFIX + topic_conf_list = [{'Id': notification_name+'_1', 'TopicArn': topic_arn1, + 'Events': ['s3:ObjectCreated:Post'] + }] + s3_notification_conf = PSNotificationS3(conn, bucket_name, topic_conf_list) + response, status = s3_notification_conf.set_config() + assert_equal(status/100, 2) + + payload = OrderedDict([("key" , "foo.txt"),("acl" , "public-read"),\ + ("Content-Type" , "text/plain"),('file', ('bar'))]) + + # POST upload + r = requests.post(url, files=payload, verify=True) + assert_equal(r.status_code, 204) + + # check amqp receiver + events = receiver1.get_and_reset_events() + assert_equal(len(events), 1) + + # cleanup + stop_amqp_receiver(receiver1, task1) + s3_notification_conf.del_config() + topic_conf1.del_config() + conn1.delete_object(Bucket=bucket_name, Key=key_name) + # delete the bucket + conn1.delete_bucket(Bucket=bucket_name) + + @attr('amqp_test') def test_ps_s3_multipart_on_master(): """ test multipart object upload on master""" @@ -1898,7 +1969,7 @@ def test_ps_s3_metadata_on_master(): time.sleep(5) # check amqp receiver events = receiver.get_and_reset_events() - assert_equal(len(events), 3) # PUT, COPY, Multipart start, Multipart End + assert_equal(len(events), 3) # PUT, COPY, Multipart Complete for event in events: assert(event['Records'][0]['s3']['object']['key'] in expected_keys) From 29fa2cec790b5a68d9a8edf48cb47ca69d08c6cc Mon Sep 17 00:00:00 2001 From: Yongseok Oh Date: Fri, 24 Sep 2021 06:04:55 +0000 Subject: [PATCH 0122/2866] mds: improve mds_bal_fragment_size_max config option mds_bal_fragment_size_max is maintained as bal_fragment_size_max cached variable Fixes: https://tracker.ceph.com/issues/52723 Signed-off-by: Yongseok Oh --- src/mds/MDSRank.cc | 1 + src/mds/Server.cc | 8 +++++++- src/mds/Server.h | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 190a53706455d..f870c61fe5d91 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -3672,6 +3672,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const "host", "mds_bal_fragment_dirs", "mds_bal_fragment_interval", + "mds_bal_fragment_size_max", "mds_cache_memory_limit", "mds_cache_mid", "mds_cache_reservation", diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 095001076220f..1ed15937013c7 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -263,6 +263,7 @@ Server::Server(MDSRank *m, MetricsHandler *metrics_handler) : max_caps_throttle_ratio = g_conf().get_val("mds_session_max_caps_throttle_ratio"); caps_throttle_retry_request_timeout = g_conf().get_val("mds_cap_acquisition_throttle_retry_request_timeout"); dir_max_entries = g_conf().get_val("mds_dir_max_entries"); + bal_fragment_size_max = g_conf().get_val("mds_bal_fragment_size_max"); supported_features = feature_bitset_t(CEPHFS_FEATURES_MDS_SUPPORTED); } @@ -1263,6 +1264,11 @@ void Server::handle_conf_change(const std::set& changed) { dout(20) << __func__ << " max entries per directory changed to " << dir_max_entries << dendl; } + if (changed.count("mds_bal_fragment_size_max")) { + bal_fragment_size_max = g_conf().get_val("mds_bal_fragment_size_max"); + dout(20) << __func__ << " max fragment size changed to " + << bal_fragment_size_max << dendl; + } } /* @@ -3205,7 +3211,7 @@ bool Server::check_access(MDRequestRef& mdr, CInode *in, unsigned mask) bool Server::check_fragment_space(MDRequestRef &mdr, CDir *dir) { const auto size = dir->get_frag_size(); - const auto max = g_conf()->mds_bal_fragment_size_max; + const auto max = bal_fragment_size_max; if (size >= max) { dout(10) << "fragment " << *dir << " size exceeds " << max << " (CEPHFS_ENOSPC)" << dendl; respond_to_request(mdr, -CEPHFS_ENOSPC); diff --git a/src/mds/Server.h b/src/mds/Server.h index 31ed68c45aeb9..76f38717c7265 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -461,6 +461,7 @@ class Server { uint64_t max_snaps_per_dir = 100; unsigned delegate_inos_pct = 0; uint64_t dir_max_entries = 0; + int64_t bal_fragment_size_max = 0; DecayCounter recall_throttle; time last_recall_state; From b9f38cadc46338c9f1527beeaaba3d42639badf9 Mon Sep 17 00:00:00 2001 From: Avan Thakkar Date: Tue, 1 Jun 2021 18:25:15 +0530 Subject: [PATCH 0123/2866] mgr/dashboard: Create Cluster Workflow welcome screen and e2e tests A module option called CLUSTER_STATUS has two option. INSTALLED AND POST_INSTALLED. When CLUSTER_STATUS is INSTALLED it will allow to show the create-cluster-wizard after login the initial time. After the cluster creation is succesfull this option is set to POST_INSTALLED Also has the e2e codes for the Review Section Fixes: https://tracker.ceph.com/issues/50336 Signed-off-by: Avan Thakkar Signed-off-by: Nizamudeen A --- qa/tasks/mgr/dashboard/test_auth.py | 6 +- qa/tasks/mgr/dashboard/test_cluster.py | 23 +++++++ src/pybind/mgr/dashboard/controllers/auth.py | 2 + .../mgr/dashboard/controllers/cluster.py | 20 ++++++ .../cluster/cluster-welcome-page.po.ts | 22 +++++++ .../cluster/create-cluster-review.po.ts | 11 ++++ .../06-cluster-welcome-page.e2e-spec.ts | 19 ++++++ .../08-create-cluster-review.e2e-spec.ts | 61 ++++++++++++++++++ .../frontend/src/app/app-routing.module.ts | 14 +++++ .../src/app/ceph/cluster/cluster.module.ts | 4 +- .../create-cluster.component.html | 29 +++++++++ .../create-cluster.component.scss | 0 .../create-cluster.component.spec.ts | 45 +++++++++++++ .../create-cluster.component.ts | 44 +++++++++++++ .../core/auth/login/login.component.spec.ts | 26 ++++++++ .../app/core/auth/login/login.component.ts | 8 ++- .../app/shared/api/cluster.service.spec.ts | 42 +++++++++++++ .../src/app/shared/api/cluster.service.ts | 27 ++++++++ .../src/app/shared/constants/app.constants.ts | 1 + .../module-status-guard.service.spec.ts | 19 +++++- .../services/module-status-guard.service.ts | 17 ++++- src/pybind/mgr/dashboard/openapi.yaml | 63 +++++++++++++++++++ src/pybind/mgr/dashboard/services/cluster.py | 26 ++++++++ 23 files changed, 520 insertions(+), 9 deletions(-) create mode 100644 qa/tasks/mgr/dashboard/test_cluster.py create mode 100644 src/pybind/mgr/dashboard/controllers/cluster.py create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/cluster-welcome-page.po.ts create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster-review.po.ts create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/06-cluster-welcome-page.e2e-spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/08-create-cluster-review.e2e-spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/api/cluster.service.spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/api/cluster.service.ts create mode 100644 src/pybind/mgr/dashboard/services/cluster.py diff --git a/qa/tasks/mgr/dashboard/test_auth.py b/qa/tasks/mgr/dashboard/test_auth.py index 8fc7cd1992e6d..98566344444f7 100644 --- a/qa/tasks/mgr/dashboard/test_auth.py +++ b/qa/tasks/mgr/dashboard/test_auth.py @@ -335,7 +335,8 @@ def test_check_wo_token(self): self.assertStatus(200) data = self.jsonBody() self.assertSchema(data, JObj(sub_elems={ - "login_url": JLeaf(str) + "login_url": JLeaf(str), + "cluster_status": JLeaf(str) }, allow_unknown=False)) self.logout() @@ -345,6 +346,7 @@ def test_check_wo_token(self): self.assertStatus(200) data = self.jsonBody() self.assertSchema(data, JObj(sub_elems={ - "login_url": JLeaf(str) + "login_url": JLeaf(str), + "cluster_status": JLeaf(str) }, allow_unknown=False)) self.logout(set_cookies=True) diff --git a/qa/tasks/mgr/dashboard/test_cluster.py b/qa/tasks/mgr/dashboard/test_cluster.py new file mode 100644 index 0000000000000..14f8542796c55 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_cluster.py @@ -0,0 +1,23 @@ +from .helper import DashboardTestCase, JLeaf, JObj + + +class ClusterTest(DashboardTestCase): + + def setUp(self): + super().setUp() + self.reset_session() + + def test_get_status(self): + data = self._get('/api/cluster', version='0.1') + self.assertStatus(200) + self.assertSchema(data, JObj(sub_elems={ + "status": JLeaf(str) + }, allow_unknown=False)) + + def test_update_status(self): + req = {'status': 'POST_INSTALLED'} + self._put('/api/cluster', req, version='0.1') + self.assertStatus(200) + data = self._get('/api/cluster', version='0.1') + self.assertStatus(200) + self.assertEqual(data, req) diff --git a/src/pybind/mgr/dashboard/controllers/auth.py b/src/pybind/mgr/dashboard/controllers/auth.py index 353d5d72bb9e8..196f027b293ee 100644 --- a/src/pybind/mgr/dashboard/controllers/auth.py +++ b/src/pybind/mgr/dashboard/controllers/auth.py @@ -7,6 +7,7 @@ from .. import mgr from ..exceptions import InvalidCredentialsError, UserDoesNotExist from ..services.auth import AuthManager, JwtManager +from ..services.cluster import ClusterModel from ..settings import Settings from . import APIDoc, APIRouter, ControllerAuthMixin, EndpointDoc, RESTController, allow_empty_body @@ -117,4 +118,5 @@ def check(self, token): } return { 'login_url': self._get_login_url(), + 'cluster_status': ClusterModel.from_db().dict()['status'] } diff --git a/src/pybind/mgr/dashboard/controllers/cluster.py b/src/pybind/mgr/dashboard/controllers/cluster.py new file mode 100644 index 0000000000000..5ec49e39b1c22 --- /dev/null +++ b/src/pybind/mgr/dashboard/controllers/cluster.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +from ..security import Scope +from ..services.cluster import ClusterModel +from . import ApiController, ControllerDoc, EndpointDoc, RESTController + + +@ApiController('/cluster', Scope.CONFIG_OPT) +@ControllerDoc("Get Cluster Details", "Cluster") +class Cluster(RESTController): + @RESTController.MethodMap(version='0.1') + @EndpointDoc("Get the cluster status") + def list(self): + return ClusterModel.from_db().dict() + + @RESTController.MethodMap(version='0.1') + @EndpointDoc("Update the cluster status", + parameters={'status': (str, 'Cluster Status')}) + def singleton_set(self, status: str): + ClusterModel(status).to_db() diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/cluster-welcome-page.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/cluster-welcome-page.po.ts new file mode 100644 index 0000000000000..5615b0369d025 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/cluster-welcome-page.po.ts @@ -0,0 +1,22 @@ +import { PageHelper } from '../page-helper.po'; +import { NotificationSidebarPageHelper } from '../ui/notification.po'; + +export class CreateClusterWelcomePageHelper extends PageHelper { + pages = { + index: { url: '#/create-cluster', id: 'cd-create-cluster' } + }; + + createCluster() { + cy.get('cd-create-cluster').should('contain.text', 'Welcome to Ceph'); + cy.get('[name=create-cluster]').click(); + } + + doSkip() { + cy.get('[name=skip-cluster-creation]').click(); + + cy.get('cd-dashboard').should('exist'); + const notification = new NotificationSidebarPageHelper(); + notification.open(); + notification.getNotifications().should('contain', 'Cluster creation skipped by user'); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster-review.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster-review.po.ts new file mode 100644 index 0000000000000..58844e39afec8 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster-review.po.ts @@ -0,0 +1,11 @@ +import { PageHelper } from '../page-helper.po'; + +export class CreateClusterReviewPageHelper extends PageHelper { + pages = { + index: { url: '#/create-cluster', id: 'cd-create-cluster-review' } + }; + + checkDefaultHostName() { + this.getTableCell(1, 'ceph-node-00.cephlab.com').should('exist'); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/06-cluster-welcome-page.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/06-cluster-welcome-page.e2e-spec.ts new file mode 100644 index 0000000000000..bd0470b86700a --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/06-cluster-welcome-page.e2e-spec.ts @@ -0,0 +1,19 @@ +import { CreateClusterWelcomePageHelper } from '../cluster/cluster-welcome-page.po'; + +describe('Create cluster page', () => { + const createCluster = new CreateClusterWelcomePageHelper(); + + beforeEach(() => { + cy.login(); + Cypress.Cookies.preserveOnce('token'); + createCluster.navigateTo(); + }); + + it('should fail to create cluster', () => { + createCluster.createCluster(); + }); + + it('should skip to dashboard landing page', () => { + createCluster.doSkip(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/08-create-cluster-review.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/08-create-cluster-review.e2e-spec.ts new file mode 100644 index 0000000000000..a472810e6e6c5 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/08-create-cluster-review.e2e-spec.ts @@ -0,0 +1,61 @@ +import { CreateClusterWelcomePageHelper } from 'cypress/integration/cluster/cluster-welcome-page.po'; +import { CreateClusterReviewPageHelper } from 'cypress/integration/cluster/create-cluster-review.po'; + +describe('Create Cluster Review page', () => { + const reviewPage = new CreateClusterReviewPageHelper(); + const createCluster = new CreateClusterWelcomePageHelper(); + + beforeEach(() => { + cy.login(); + Cypress.Cookies.preserveOnce('token'); + createCluster.navigateTo(); + createCluster.createCluster(); + + cy.get('button[aria-label="Next"]').click(); + }); + + describe('navigation link and title test', () => { + it('should check if nav-link and title contains Review', () => { + cy.get('.nav-link').should('contain.text', 'Review'); + + cy.get('.title').should('contain.text', 'Review'); + }); + }); + + describe('fields check', () => { + it('should check cluster resources table is present', () => { + // check for table header 'Status' + reviewPage.getLegends().its(0).should('have.text', 'Cluster Resources'); + + // check for fields in table + reviewPage.getStatusTables().should('contain.text', 'Hosts'); + }); + + it('should check Hosts Per Label and Host Details tables are present', () => { + // check for there to be two tables + reviewPage.getDataTables().should('have.length', 2); + + // check for table header 'Hosts Per Label' + reviewPage.getLegends().its(1).should('have.text', 'Hosts Per Label'); + + // check for table header 'Host Details' + reviewPage.getLegends().its(2).should('have.text', 'Host Details'); + + // verify correct columns on Hosts Per Label table + reviewPage.getDataTableHeaders(0).contains('Label'); + + reviewPage.getDataTableHeaders(0).contains('Number of Hosts'); + + // verify correct columns on Host Details table + reviewPage.getDataTableHeaders(1).contains('Host Name'); + + reviewPage.getDataTableHeaders(1).contains('Labels'); + }); + + it('should check hosts count and default host name are present', () => { + reviewPage.getStatusTables().should('contain.text', '1'); + + reviewPage.checkDefaultHostName(); + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts index ebbe6f6651c1c..099b31efbda50 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts @@ -6,6 +6,7 @@ import _ from 'lodash'; import { CephfsListComponent } from './ceph/cephfs/cephfs-list/cephfs-list.component'; import { ConfigurationFormComponent } from './ceph/cluster/configuration/configuration-form/configuration-form.component'; import { ConfigurationComponent } from './ceph/cluster/configuration/configuration.component'; +import { CreateClusterComponent } from './ceph/cluster/create-cluster/create-cluster.component'; import { CrushmapComponent } from './ceph/cluster/crushmap/crushmap.component'; import { HostFormComponent } from './ceph/cluster/hosts/host-form/host-form.component'; import { HostsComponent } from './ceph/cluster/hosts/hosts.component'; @@ -89,6 +90,19 @@ const routes: Routes = [ { path: 'error', component: ErrorComponent }, // Cluster + { + path: 'create-cluster', + component: CreateClusterComponent, + canActivate: [ModuleStatusGuardService], + data: { + moduleStatusGuardConfig: { + apiPath: 'orchestrator', + redirectTo: 'dashboard', + backend: 'cephadm' + }, + breadcrumbs: 'Create Cluster' + } + }, { path: 'hosts', data: { breadcrumbs: 'Cluster/Hosts' }, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts index cc58c38b8dc9c..a2c1e6d2f89ec 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts @@ -21,6 +21,7 @@ import { CephSharedModule } from '../shared/ceph-shared.module'; import { ConfigurationDetailsComponent } from './configuration/configuration-details/configuration-details.component'; import { ConfigurationFormComponent } from './configuration/configuration-form/configuration-form.component'; import { ConfigurationComponent } from './configuration/configuration.component'; +import { CreateClusterComponent } from './create-cluster/create-cluster.component'; import { CrushmapComponent } from './crushmap/crushmap.component'; import { HostDetailsComponent } from './hosts/host-details/host-details.component'; import { HostFormComponent } from './hosts/host-form/host-form.component'; @@ -112,7 +113,8 @@ import { TelemetryComponent } from './telemetry/telemetry.component'; PrometheusTabsComponent, ServiceFormComponent, OsdFlagsIndivModalComponent, - PlacementPipe + PlacementPipe, + CreateClusterComponent ] }) export class ClusterModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html new file mode 100644 index 0000000000000..661c13fc931c9 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html @@ -0,0 +1,29 @@ +
+
+
+ + Ceph +

Welcome to {{ projectConstants.projectName }}

+ +
+

Please proceed to complete the cluster creation

+
+ + +
+
+
+
+
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts new file mode 100644 index 0000000000000..7e061b2e25c9a --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts @@ -0,0 +1,45 @@ +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { ComponentFixture, TestBed } from '@angular/core/testing'; +import { By } from '@angular/platform-browser'; +import { RouterTestingModule } from '@angular/router/testing'; + +import { ToastrModule } from 'ngx-toastr'; + +import { ClusterService } from '~/app/shared/api/cluster.service'; +import { SharedModule } from '~/app/shared/shared.module'; +import { configureTestBed } from '~/testing/unit-test-helper'; +import { CreateClusterComponent } from './create-cluster.component'; + +describe('CreateClusterComponent', () => { + let component: CreateClusterComponent; + let fixture: ComponentFixture; + let clusterService: ClusterService; + + configureTestBed({ + declarations: [CreateClusterComponent], + imports: [HttpClientTestingModule, RouterTestingModule, ToastrModule.forRoot(), SharedModule] + }); + + beforeEach(() => { + fixture = TestBed.createComponent(CreateClusterComponent); + component = fixture.componentInstance; + clusterService = TestBed.inject(ClusterService); + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should have the heading "Welcome to Ceph Dashboard"', () => { + const heading = fixture.debugElement.query(By.css('h3')).nativeElement; + expect(heading.innerHTML).toBe('Welcome to Ceph Dashboard'); + }); + + it('should call updateStatus when cluster creation is skipped', () => { + const clusterServiceSpy = spyOn(clusterService, 'updateStatus').and.callThrough(); + expect(clusterServiceSpy).not.toHaveBeenCalled(); + component.skipClusterCreation(); + expect(clusterServiceSpy).toHaveBeenCalledTimes(1); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts new file mode 100644 index 0000000000000..239a4f13ca7f0 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts @@ -0,0 +1,44 @@ +import { Component } from '@angular/core'; + +import { ClusterService } from '~/app/shared/api/cluster.service'; +import { AppConstants } from '~/app/shared/constants/app.constants'; +import { NotificationType } from '~/app/shared/enum/notification-type.enum'; +import { Permission } from '~/app/shared/models/permissions'; +import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { NotificationService } from '~/app/shared/services/notification.service'; + +@Component({ + selector: 'cd-create-cluster', + templateUrl: './create-cluster.component.html', + styleUrls: ['./create-cluster.component.scss'] +}) +export class CreateClusterComponent { + permission: Permission; + orchStatus = false; + featureAvailable = false; + projectConstants: typeof AppConstants = AppConstants; + + constructor( + private authStorageService: AuthStorageService, + private clusterService: ClusterService, + private notificationService: NotificationService + ) { + this.permission = this.authStorageService.getPermissions().configOpt; + } + + createCluster() { + this.notificationService.show( + NotificationType.error, + $localize`Cluster creation feature not implemented` + ); + } + + skipClusterCreation() { + this.clusterService.updateStatus('POST_INSTALLED').subscribe(() => { + this.notificationService.show( + NotificationType.info, + $localize`Cluster creation skipped by user` + ); + }); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts index 15a7275739f36..3cbfab4ebaac3 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts @@ -1,7 +1,11 @@ import { HttpClientTestingModule } from '@angular/common/http/testing'; import { ComponentFixture, TestBed } from '@angular/core/testing'; +import { Router } from '@angular/router'; import { RouterTestingModule } from '@angular/router/testing'; +import { of } from 'rxjs'; + +import { AuthService } from '~/app/shared/api/auth.service'; import { configureTestBed } from '~/testing/unit-test-helper'; import { AuthModule } from '../auth.module'; import { LoginComponent } from './login.component'; @@ -9,6 +13,8 @@ import { LoginComponent } from './login.component'; describe('LoginComponent', () => { let component: LoginComponent; let fixture: ComponentFixture; + let routerNavigateSpy: jasmine.Spy; + let authServiceLoginSpy: jasmine.Spy; configureTestBed({ imports: [RouterTestingModule, HttpClientTestingModule, AuthModule] @@ -17,6 +23,10 @@ describe('LoginComponent', () => { beforeEach(() => { fixture = TestBed.createComponent(LoginComponent); component = fixture.componentInstance; + routerNavigateSpy = spyOn(TestBed.inject(Router), 'navigate'); + routerNavigateSpy.and.returnValue(true); + authServiceLoginSpy = spyOn(TestBed.inject(AuthService), 'login'); + authServiceLoginSpy.and.returnValue(of(null)); fixture.detectChanges(); }); @@ -29,4 +39,20 @@ describe('LoginComponent', () => { component.ngOnInit(); expect(component['modalService'].hasOpenModals()).toBeFalsy(); }); + + it('should not show create cluster wizard if cluster creation was successful', () => { + component.postInstalled = true; + component.login(); + + expect(routerNavigateSpy).toHaveBeenCalledTimes(1); + expect(routerNavigateSpy).toHaveBeenCalledWith(['/']); + }); + + it('should show create cluster wizard if cluster creation was failed', () => { + component.postInstalled = false; + component.login(); + + expect(routerNavigateSpy).toHaveBeenCalledTimes(1); + expect(routerNavigateSpy).toHaveBeenCalledWith(['/create-cluster']); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts index 868ba66a00248..77bafd99c82e0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts @@ -17,6 +17,7 @@ export class LoginComponent implements OnInit { model = new Credentials(); isLoginActive = false; returnUrl: string; + postInstalled = false; constructor( private authService: AuthService, @@ -43,6 +44,7 @@ export class LoginComponent implements OnInit { } this.authService.check(token).subscribe((login: any) => { if (login.login_url) { + this.postInstalled = login.cluster_status === 'POST_INSTALLED'; if (login.login_url === '#/login') { this.isLoginActive = true; } else { @@ -63,7 +65,11 @@ export class LoginComponent implements OnInit { login() { this.authService.login(this.model).subscribe(() => { - const url = _.get(this.route.snapshot.queryParams, 'returnUrl', '/'); + const urlPath = this.postInstalled ? '/' : '/create-cluster'; + let url = _.get(this.route.snapshot.queryParams, 'returnUrl', urlPath); + if (!this.postInstalled && this.route.snapshot.queryParams['returnUrl'] === '/dashboard') { + url = '/create-cluster'; + } this.router.navigate([url]); }); } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cluster.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cluster.service.spec.ts new file mode 100644 index 0000000000000..758f670eec66b --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cluster.service.spec.ts @@ -0,0 +1,42 @@ +import { HttpClientTestingModule, HttpTestingController } from '@angular/common/http/testing'; +import { fakeAsync, TestBed } from '@angular/core/testing'; + +import { configureTestBed } from '~/testing/unit-test-helper'; +import { ClusterService } from './cluster.service'; + +describe('ClusterService', () => { + let service: ClusterService; + let httpTesting: HttpTestingController; + + configureTestBed({ + imports: [HttpClientTestingModule], + providers: [ClusterService] + }); + + beforeEach(() => { + TestBed.configureTestingModule({}); + service = TestBed.inject(ClusterService); + httpTesting = TestBed.inject(HttpTestingController); + }); + + afterEach(() => { + httpTesting.verify(); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); + + it('should call getStatus', () => { + service.getStatus().subscribe(); + const req = httpTesting.expectOne('api/cluster'); + expect(req.request.method).toBe('GET'); + }); + + it('should update cluster status', fakeAsync(() => { + service.updateStatus('fakeStatus').subscribe(); + const req = httpTesting.expectOne('api/cluster'); + expect(req.request.method).toBe('PUT'); + expect(req.request.body).toEqual({ status: 'fakeStatus' }); + })); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cluster.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cluster.service.ts new file mode 100644 index 0000000000000..6b435d6ffed1d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cluster.service.ts @@ -0,0 +1,27 @@ +import { HttpClient } from '@angular/common/http'; +import { Injectable } from '@angular/core'; + +import { Observable } from 'rxjs'; + +@Injectable({ + providedIn: 'root' +}) +export class ClusterService { + baseURL = 'api/cluster'; + + constructor(private http: HttpClient) {} + + getStatus(): Observable { + return this.http.get(`${this.baseURL}`, { + headers: { Accept: 'application/vnd.ceph.api.v0.1+json' } + }); + } + + updateStatus(status: string) { + return this.http.put( + `${this.baseURL}`, + { status: status }, + { headers: { Accept: 'application/vnd.ceph.api.v0.1+json' } } + ); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts index 05d6b5c53a9ad..5b668ad9000de 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts @@ -7,6 +7,7 @@ export class AppConstants { public static readonly projectName = 'Ceph Dashboard'; public static readonly license = 'Free software (LGPL 2.1).'; public static readonly copyright = 'Copyright(c) ' + environment.year + ' Ceph contributors.'; + public static readonly cephLogo = 'assets/Ceph_Logo.svg'; } export enum URLVerbs { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/module-status-guard.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/module-status-guard.service.spec.ts index 0948fc878a9e0..ebacc06c15192 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/module-status-guard.service.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/module-status-guard.service.spec.ts @@ -7,6 +7,7 @@ import { RouterTestingModule } from '@angular/router/testing'; import { of as observableOf } from 'rxjs'; import { configureTestBed } from '~/testing/unit-test-helper'; +import { MgrModuleService } from '../api/mgr-module.service'; import { ModuleStatusGuardService } from './module-status-guard.service'; describe('ModuleStatusGuardService', () => { @@ -15,6 +16,7 @@ describe('ModuleStatusGuardService', () => { let router: Router; let route: ActivatedRouteSnapshot; let ngZone: NgZone; + let mgrModuleService: MgrModuleService; @Component({ selector: 'cd-foo', template: '' }) class FooComponent {} @@ -25,9 +27,16 @@ describe('ModuleStatusGuardService', () => { const routes: Routes = [{ path: '**', component: FooComponent }]; - const testCanActivate = (getResult: {}, activateResult: boolean, urlResult: string) => { + const testCanActivate = ( + getResult: {}, + activateResult: boolean, + urlResult: string, + backend = 'cephadm' + ) => { let result: boolean; spyOn(httpClient, 'get').and.returnValue(observableOf(getResult)); + const test = { orchestrator: backend }; + spyOn(mgrModuleService, 'getConfig').and.returnValue(observableOf(test)); ngZone.run(() => { service.canActivateChild(route).subscribe((resp) => { result = resp; @@ -48,13 +57,15 @@ describe('ModuleStatusGuardService', () => { beforeEach(() => { service = TestBed.inject(ModuleStatusGuardService); httpClient = TestBed.inject(HttpClient); + mgrModuleService = TestBed.inject(MgrModuleService); router = TestBed.inject(Router); route = new ActivatedRouteSnapshot(); route.url = []; route.data = { moduleStatusGuardConfig: { apiPath: 'bar', - redirectTo: '/foo' + redirectTo: '/foo', + backend: 'rook' } }; ngZone = TestBed.inject(NgZone); @@ -76,4 +87,8 @@ describe('ModuleStatusGuardService', () => { it('should test canActivateChild with status unavailable', fakeAsync(() => { testCanActivate(null, false, '/foo'); })); + + it('should redirect normally if the backend provided matches the current backend', fakeAsync(() => { + testCanActivate({ available: true, message: 'foo' }, true, '/', 'rook'); + })); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/module-status-guard.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/module-status-guard.service.ts index 171f34adfe6b4..3162afd232933 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/module-status-guard.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/module-status-guard.service.ts @@ -5,7 +5,8 @@ import { ActivatedRouteSnapshot, CanActivate, CanActivateChild, Router } from '@ import { of as observableOf } from 'rxjs'; import { catchError, map } from 'rxjs/operators'; -import { Icons } from '../enum/icons.enum'; +import { MgrModuleService } from '~/app/shared/api/mgr-module.service'; +import { Icons } from '~/app/shared/enum/icons.enum'; /** * This service checks if a route can be activated by executing a @@ -39,7 +40,11 @@ export class ModuleStatusGuardService implements CanActivate, CanActivateChild { // TODO: Hotfix - remove ALLOWLIST'ing when a generic ErrorComponent is implemented static readonly ALLOWLIST: string[] = ['501']; - constructor(private http: HttpClient, private router: Router) {} + constructor( + private http: HttpClient, + private router: Router, + private mgrModuleService: MgrModuleService + ) {} canActivate(route: ActivatedRouteSnapshot) { return this.doCheck(route); @@ -54,9 +59,15 @@ export class ModuleStatusGuardService implements CanActivate, CanActivateChild { return observableOf(true); } const config = route.data['moduleStatusGuardConfig']; + let backendCheck = false; + if (config.backend) { + this.mgrModuleService.getConfig('orchestrator').subscribe((resp) => { + backendCheck = config.backend === resp['orchestrator']; + }); + } return this.http.get(`api/${config.apiPath}/status`).pipe( map((resp: any) => { - if (!resp.available) { + if (!resp.available && !backendCheck) { this.router.navigate([config.redirectTo || ''], { state: { header: config.header, diff --git a/src/pybind/mgr/dashboard/openapi.yaml b/src/pybind/mgr/dashboard/openapi.yaml index 844457b0a72d3..f03102599b0df 100644 --- a/src/pybind/mgr/dashboard/openapi.yaml +++ b/src/pybind/mgr/dashboard/openapi.yaml @@ -2038,6 +2038,67 @@ paths: - jwt: [] tags: - Cephfs + /api/cluster: + get: + parameters: [] + responses: + '200': + content: + application/vnd.ceph.api.v0.1+json: + type: object + description: OK + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + summary: Get the cluster status + tags: + - Cluster + put: + parameters: [] + requestBody: + content: + application/json: + schema: + properties: + status: + description: Cluster Status + type: string + required: + - status + type: object + responses: + '200': + content: + application/vnd.ceph.api.v0.1+json: + type: object + description: Resource updated. + '202': + content: + application/vnd.ceph.api.v0.1+json: + type: object + description: Operation is still executing. Please check the task queue. + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + summary: Update the cluster status + tags: + - Cluster /api/cluster_conf: get: parameters: [] @@ -10388,6 +10449,8 @@ tags: name: Auth - description: Cephfs Management API name: Cephfs +- description: Get Cluster Details + name: Cluster - description: Manage Cluster Configurations name: ClusterConfiguration - description: Crush Rule Management API diff --git a/src/pybind/mgr/dashboard/services/cluster.py b/src/pybind/mgr/dashboard/services/cluster.py new file mode 100644 index 0000000000000..aad517a21d68a --- /dev/null +++ b/src/pybind/mgr/dashboard/services/cluster.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +from enum import Enum + +from .. import mgr + + +class ClusterModel: + + class Status(Enum): + INSTALLED = 0 + POST_INSTALLED = 1 + + status: Status + + def __init__(self, status=Status.INSTALLED.name): + self.status = self.Status[status] + + def dict(self): + return {'status': self.status.name} + + def to_db(self): + mgr.set_store('cluster/status', self.status.name) + + @classmethod + def from_db(cls): + return cls(status=mgr.get_store('cluster/status', cls.Status.INSTALLED.name)) From 59cbf97e6ceedee0f9d682a94a749fbb01e9f787 Mon Sep 17 00:00:00 2001 From: Nizamudeen A Date: Sun, 4 Jul 2021 18:46:45 +0530 Subject: [PATCH 0124/2866] mgr/dashboard: Cluster Creation Add Host Section and e2es Add host section of the cluster creation workflow. 1. Fix bug in the modal where going forward one step on the wizard and coming back opens up the add host modal. 2. Rename Create Cluster to Expand Cluster as per the discussions 3. A skip confirmation modal to warn the user when he tries to skip the cluster creation 4. Adapted all the tests 5. Did some UI improvements like fixing and aligning the styles, colors.. - Used routed modal for host Additon form - Renamed the Create to Add in Host Form Fixes: https://tracker.ceph.com/issues/51517 Fixes: https://tracker.ceph.com/issues/51640 Fixes: https://tracker.ceph.com/issues/50336 Fixes: https://tracker.ceph.com/issues/50565 Signed-off-by: Avan Thakkar Signed-off-by: Aashish Sharma Signed-off-by: Nizamudeen A --- .../rados/dashboard/tasks/dashboard.yaml | 1 + src/pybind/mgr/dashboard/controllers/host.py | 2 +- .../cluster/cluster-welcome-page.po.ts | 22 --- .../cluster/create-cluster-review.po.ts | 11 -- .../integration/cluster/create-cluster.po.ts | 113 +++++++++++++ .../cypress/integration/cluster/hosts.po.ts | 17 +- .../orchestrator/01-hosts.e2e-spec.ts | 4 +- ...1-create-cluster-welcome-page.e2e-spec.ts} | 4 +- .../workflow/01-hosts.e2e-spec.ts | 4 +- .../02-create-cluster-add-host.e2e-spec.ts | 48 ++++++ .../03-create-cluster-review.e2e-spec.ts | 59 +++++++ .../workflow/04-cluster-check.e2e-spec.ts | 49 ++++++ .../08-create-cluster-review.e2e-spec.ts | 61 ------- .../frontend/src/app/app-routing.module.ts | 10 +- .../src/app/ceph/cluster/cluster.module.ts | 5 +- .../create-cluster.component.html | 60 ++++++- .../create-cluster.component.scss | 26 +++ .../create-cluster.component.spec.ts | 92 ++++++++-- .../create-cluster.component.ts | 128 +++++++++++--- .../hosts/host-form/host-form.component.html | 160 +++++++++--------- .../host-form/host-form.component.spec.ts | 10 +- .../hosts/host-form/host-form.component.ts | 22 ++- .../ceph/cluster/hosts/hosts.component.html | 8 +- .../cluster/hosts/hosts.component.spec.ts | 19 ++- .../app/ceph/cluster/hosts/hosts.component.ts | 43 +++-- .../osd-flags-indiv-modal.component.spec.ts | 4 +- .../core/auth/login/login.component.spec.ts | 2 +- .../app/core/auth/login/login.component.ts | 4 +- .../shared/components/components.module.ts | 7 +- .../components/modal/modal.component.html | 30 ++-- .../components/modal/modal.component.spec.ts | 15 +- .../components/modal/modal.component.ts | 9 +- .../components/wizard/wizard.component.html | 19 +++ .../components/wizard/wizard.component.scss | 30 ++++ .../wizard/wizard.component.spec.ts | 25 +++ .../components/wizard/wizard.component.ts | 39 +++++ .../src/app/shared/models/wizard-steps.ts | 4 + .../shared/services/task-message.service.ts | 4 +- .../services/wizard-steps.service.spec.ts | 16 ++ .../shared/services/wizard-steps.service.ts | 58 +++++++ .../src/styles/ceph-custom/_forms.scss | 11 ++ 41 files changed, 966 insertions(+), 289 deletions(-) delete mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/cluster-welcome-page.po.ts delete mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster-review.po.ts create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster.po.ts rename src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/{06-cluster-welcome-page.e2e-spec.ts => workflow/01-create-cluster-welcome-page.e2e-spec.ts} (69%) create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/02-create-cluster-add-host.e2e-spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-review.e2e-spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/04-cluster-check.e2e-spec.ts delete mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/08-create-cluster-review.e2e-spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.html create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.scss create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/models/wizard-steps.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/services/wizard-steps.service.spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/shared/services/wizard-steps.service.ts diff --git a/qa/suites/rados/dashboard/tasks/dashboard.yaml b/qa/suites/rados/dashboard/tasks/dashboard.yaml index 1d0ec7395eae3..db28999ece922 100644 --- a/qa/suites/rados/dashboard/tasks/dashboard.yaml +++ b/qa/suites/rados/dashboard/tasks/dashboard.yaml @@ -39,6 +39,7 @@ tasks: - tasks.mgr.dashboard.test_auth - tasks.mgr.dashboard.test_cephfs - tasks.mgr.dashboard.test_cluster_configuration + - tasks.mgr.dashboard.test_cluster - tasks.mgr.dashboard.test_crush_rule - tasks.mgr.dashboard.test_erasure_code_profile - tasks.mgr.dashboard.test_ganesha diff --git a/src/pybind/mgr/dashboard/controllers/host.py b/src/pybind/mgr/dashboard/controllers/host.py index a72c4b34052d5..6ba1a5e0f1cbb 100644 --- a/src/pybind/mgr/dashboard/controllers/host.py +++ b/src/pybind/mgr/dashboard/controllers/host.py @@ -285,7 +285,7 @@ def list(self, sources=None): @raise_if_no_orchestrator([OrchFeature.HOST_LIST, OrchFeature.HOST_CREATE]) @handle_orchestrator_error('host') - @host_task('create', {'hostname': '{hostname}'}) + @host_task('add', {'hostname': '{hostname}'}) @EndpointDoc('', parameters={ 'hostname': (str, 'Hostname'), diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/cluster-welcome-page.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/cluster-welcome-page.po.ts deleted file mode 100644 index 5615b0369d025..0000000000000 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/cluster-welcome-page.po.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { PageHelper } from '../page-helper.po'; -import { NotificationSidebarPageHelper } from '../ui/notification.po'; - -export class CreateClusterWelcomePageHelper extends PageHelper { - pages = { - index: { url: '#/create-cluster', id: 'cd-create-cluster' } - }; - - createCluster() { - cy.get('cd-create-cluster').should('contain.text', 'Welcome to Ceph'); - cy.get('[name=create-cluster]').click(); - } - - doSkip() { - cy.get('[name=skip-cluster-creation]').click(); - - cy.get('cd-dashboard').should('exist'); - const notification = new NotificationSidebarPageHelper(); - notification.open(); - notification.getNotifications().should('contain', 'Cluster creation skipped by user'); - } -} diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster-review.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster-review.po.ts deleted file mode 100644 index 58844e39afec8..0000000000000 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster-review.po.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { PageHelper } from '../page-helper.po'; - -export class CreateClusterReviewPageHelper extends PageHelper { - pages = { - index: { url: '#/create-cluster', id: 'cd-create-cluster-review' } - }; - - checkDefaultHostName() { - this.getTableCell(1, 'ceph-node-00.cephlab.com').should('exist'); - } -} diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster.po.ts new file mode 100644 index 0000000000000..22941b28e7cb7 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster.po.ts @@ -0,0 +1,113 @@ +import { PageHelper } from '../page-helper.po'; +import { NotificationSidebarPageHelper } from '../ui/notification.po'; + +const pages = { + index: { url: '#/expand-cluster', id: 'cd-create-cluster' } +}; + +export class CreateClusterWizardHelper extends PageHelper { + pages = pages; + columnIndex = { + hostname: 1, + labels: 2, + status: 3 + }; + + createCluster() { + cy.get('cd-create-cluster').should('contain.text', 'Please expand your cluster first'); + cy.get('[name=expand-cluster]').click(); + } + + doSkip() { + cy.get('[name=skip-cluster-creation]').click(); + cy.contains('cd-modal button', 'Continue').click(); + + cy.get('cd-dashboard').should('exist'); + const notification = new NotificationSidebarPageHelper(); + notification.open(); + notification.getNotifications().should('contain', 'Cluster expansion skipped by user'); + } + + check_for_host() { + this.getTableCount('total').should('not.be.eq', 0); + } + + clickHostTab(hostname: string, tabName: string) { + this.getExpandCollapseElement(hostname).click(); + cy.get('cd-host-details').within(() => { + this.getTab(tabName).click(); + }); + } + + add(hostname: string, exist?: boolean, maintenance?: boolean) { + cy.get('.btn.btn-accent').first().click({ force: true }); + + cy.get('cd-modal').should('exist'); + cy.get('cd-modal').within(() => { + cy.get('#hostname').type(hostname); + if (maintenance) { + cy.get('label[for=maintenance]').click(); + } + if (exist) { + cy.get('#hostname').should('have.class', 'ng-invalid'); + } + cy.get('cd-submit-button').click(); + }); + // back to host list + cy.get(`${this.pages.index.id}`); + } + + checkExist(hostname: string, exist: boolean) { + this.clearTableSearchInput(); + this.getTableCell(this.columnIndex.hostname, hostname).should(($elements) => { + const hosts = $elements.map((_, el) => el.textContent).get(); + if (exist) { + expect(hosts).to.include(hostname); + } else { + expect(hosts).to.not.include(hostname); + } + }); + } + + delete(hostname: string) { + super.delete(hostname, this.columnIndex.hostname); + } + + // Add or remove labels on a host, then verify labels in the table + editLabels(hostname: string, labels: string[], add: boolean) { + this.getTableCell(this.columnIndex.hostname, hostname).click(); + this.clickActionButton('edit'); + + // add or remove label badges + if (add) { + cy.get('cd-modal').find('.select-menu-edit').click(); + for (const label of labels) { + cy.contains('cd-modal .badge', new RegExp(`^${label}$`)).should('not.exist'); + cy.get('.popover-body input').type(`${label}{enter}`); + } + } else { + for (const label of labels) { + cy.contains('cd-modal .badge', new RegExp(`^${label}$`)) + .find('.badge-remove') + .click(); + } + } + cy.get('cd-modal cd-submit-button').click(); + + // Verify labels are added or removed from Labels column + // First find row with hostname, then find labels in the row + this.getTableCell(this.columnIndex.hostname, hostname) + .parent() + .find(`datatable-body-cell:nth-child(${this.columnIndex.labels}) .badge`) + .should(($ele) => { + const newLabels = $ele.toArray().map((v) => v.innerText); + for (const label of labels) { + if (add) { + expect(newLabels).to.include(label); + } else { + expect(newLabels).to.not.include(label); + } + } + }); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/hosts.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/hosts.po.ts index 6752fe9e7870c..7a7e00d6648ac 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/hosts.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/hosts.po.ts @@ -2,7 +2,7 @@ import { PageHelper } from '../page-helper.po'; const pages = { index: { url: '#/hosts', id: 'cd-hosts' }, - create: { url: '#/hosts/create', id: 'cd-host-form' } + add: { url: '#/hosts/(modal:add)', id: 'cd-host-form' } }; export class HostsPageHelper extends PageHelper { @@ -49,21 +49,20 @@ export class HostsPageHelper extends PageHelper { }); } - @PageHelper.restrictTo(pages.create.url) + @PageHelper.restrictTo(pages.add.url) add(hostname: string, exist?: boolean, maintenance?: boolean) { - cy.get(`${this.pages.create.id}`).within(() => { + cy.get(`${this.pages.add.id}`).within(() => { cy.get('#hostname').type(hostname); if (maintenance) { cy.get('label[for=maintenance]').click(); } + if (exist) { + cy.get('#hostname').should('have.class', 'ng-invalid'); + } cy.get('cd-submit-button').click(); }); - if (exist) { - cy.get('#hostname').should('have.class', 'ng-invalid'); - } else { - // back to host list - cy.get(`${this.pages.index.id}`); - } + // back to host list + cy.get(`${this.pages.index.id}`); } @PageHelper.restrictTo(pages.index.url) diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/01-hosts.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/01-hosts.e2e-spec.ts index cf85642a1b1d2..6c79a74662dff 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/01-hosts.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/01-hosts.e2e-spec.ts @@ -17,7 +17,7 @@ describe('Hosts page', () => { it('should not add an exsiting host', function () { const hostname = Cypress._.sample(this.hosts).name; - hosts.navigateTo('create'); + hosts.navigateTo('add'); hosts.add(hostname, true); }); @@ -26,7 +26,7 @@ describe('Hosts page', () => { hosts.delete(host); // add it back - hosts.navigateTo('create'); + hosts.navigateTo('add'); hosts.add(host); hosts.checkExist(host, true); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/06-cluster-welcome-page.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/01-create-cluster-welcome-page.e2e-spec.ts similarity index 69% rename from src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/06-cluster-welcome-page.e2e-spec.ts rename to src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/01-create-cluster-welcome-page.e2e-spec.ts index bd0470b86700a..d776a210a57ee 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/06-cluster-welcome-page.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/01-create-cluster-welcome-page.e2e-spec.ts @@ -1,7 +1,7 @@ -import { CreateClusterWelcomePageHelper } from '../cluster/cluster-welcome-page.po'; +import { CreateClusterWizardHelper } from 'cypress/integration/cluster/create-cluster.po'; describe('Create cluster page', () => { - const createCluster = new CreateClusterWelcomePageHelper(); + const createCluster = new CreateClusterWizardHelper(); beforeEach(() => { cy.login(); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/01-hosts.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/01-hosts.e2e-spec.ts index 7dd0c1104334c..e8dea8e30080f 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/01-hosts.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/01-hosts.e2e-spec.ts @@ -12,7 +12,7 @@ describe('Hosts page', () => { 'ceph-node-02.cephlab.com' ]; const addHost = (hostname: string, exist?: boolean, maintenance?: boolean) => { - hosts.navigateTo('create'); + hosts.navigateTo('add'); hosts.add(hostname, exist, maintenance); hosts.checkExist(hostname, true); }; @@ -49,7 +49,7 @@ describe('Hosts page', () => { }); it('should not add an existing host', function () { - hosts.navigateTo('create'); + hosts.navigateTo('add'); hosts.add(hostnames[0], true); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/02-create-cluster-add-host.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/02-create-cluster-add-host.e2e-spec.ts new file mode 100644 index 0000000000000..7bf5b5be3d496 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/02-create-cluster-add-host.e2e-spec.ts @@ -0,0 +1,48 @@ +import { CreateClusterWizardHelper } from 'cypress/integration/cluster/create-cluster.po'; + +describe('Create cluster add host page', () => { + const createCluster = new CreateClusterWizardHelper(); + const hostnames = [ + 'ceph-node-00.cephlab.com', + 'ceph-node-01.cephlab.com', + 'ceph-node-02.cephlab.com' + ]; + const addHost = (hostname: string, exist?: boolean) => { + createCluster.add(hostname, exist, true); + createCluster.checkExist(hostname, true); + }; + + beforeEach(() => { + cy.login(); + Cypress.Cookies.preserveOnce('token'); + createCluster.navigateTo(); + createCluster.createCluster(); + }); + + it('should check if nav-link and title contains Add Hosts', () => { + cy.get('.nav-link').should('contain.text', 'Add Hosts'); + + cy.get('.title').should('contain.text', 'Add Hosts'); + }); + + it('should check existing host and add new hosts into maintenance mode', () => { + createCluster.checkExist(hostnames[0], true); + + addHost(hostnames[1], false); + addHost(hostnames[2], false); + }); + + it('should not add an existing host', () => { + createCluster.add(hostnames[0], true); + }); + + it('should edit host labels', () => { + const labels = ['foo', 'bar']; + createCluster.editLabels(hostnames[0], labels, true); + createCluster.editLabels(hostnames[0], labels, false); + }); + + it('should delete a host', () => { + createCluster.delete(hostnames[1]); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-review.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-review.e2e-spec.ts new file mode 100644 index 0000000000000..17dd84994d60e --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-review.e2e-spec.ts @@ -0,0 +1,59 @@ +import { CreateClusterWizardHelper } from 'cypress/integration/cluster/create-cluster.po'; + +describe('Create Cluster Review page', () => { + const createCluster = new CreateClusterWizardHelper(); + + beforeEach(() => { + cy.login(); + Cypress.Cookies.preserveOnce('token'); + createCluster.navigateTo(); + createCluster.createCluster(); + + cy.get('button[aria-label="Next"]').click(); + }); + + describe('navigation link and title test', () => { + it('should check if nav-link and title contains Review', () => { + cy.get('.nav-link').should('contain.text', 'Review'); + + cy.get('.title').should('contain.text', 'Review'); + }); + }); + + describe('fields check', () => { + it('should check cluster resources table is present', () => { + // check for table header 'Cluster Resources' + createCluster.getLegends().its(0).should('have.text', 'Cluster Resources'); + + // check for fields in table + createCluster.getStatusTables().should('contain.text', 'Hosts'); + }); + + it('should check Hosts by Label and Host Details tables are present', () => { + // check for there to be two tables + createCluster.getDataTables().should('have.length', 2); + + // check for table header 'Hosts by Label' + createCluster.getLegends().its(1).should('have.text', 'Hosts by Label'); + + // check for table header 'Host Details' + createCluster.getLegends().its(2).should('have.text', 'Host Details'); + + // verify correct columns on Hosts by Label table + createCluster.getDataTableHeaders(0).contains('Label'); + + createCluster.getDataTableHeaders(0).contains('Number of Hosts'); + + // verify correct columns on Host Details table + createCluster.getDataTableHeaders(1).contains('Host Name'); + + createCluster.getDataTableHeaders(1).contains('Labels'); + }); + + it('should check hosts count and default host name are present', () => { + createCluster.getStatusTables().contains(2); + + createCluster.check_for_host(); + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/04-cluster-check.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/04-cluster-check.e2e-spec.ts new file mode 100644 index 0000000000000..9717dd8b7dbf8 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/04-cluster-check.e2e-spec.ts @@ -0,0 +1,49 @@ +import { CreateClusterWizardHelper } from 'cypress/integration/cluster/create-cluster.po'; +import { HostsPageHelper } from 'cypress/integration/cluster/hosts.po'; + +describe('when cluster creation is completed', () => { + const createCluster = new CreateClusterWizardHelper(); + + beforeEach(() => { + cy.login(); + Cypress.Cookies.preserveOnce('token'); + }); + + it('should redirect to dashboard landing page after cluster creation', () => { + createCluster.navigateTo(); + createCluster.createCluster(); + + cy.get('button[aria-label="Next"]').click(); + cy.get('button[aria-label="Next"]').click(); + + cy.get('cd-dashboard').should('exist'); + }); + + describe('Hosts page', () => { + const hosts = new HostsPageHelper(); + const hostnames = ['ceph-node-00.cephlab.com', 'ceph-node-02.cephlab.com']; + + beforeEach(() => { + hosts.navigateTo(); + }); + it('should have already exited from maintenance', () => { + for (let host = 0; host < hostnames.length; host++) { + cy.get('datatable-row-wrapper').should('not.have.text', 'maintenance'); + } + }); + + it('should display inventory', () => { + hosts.clickHostTab(hostnames[1], 'Physical Disks'); + cy.get('cd-host-details').within(() => { + hosts.getTableCount('total').should('be.gte', 0); + }); + }); + + it('should display daemons', () => { + hosts.clickHostTab(hostnames[1], 'Daemons'); + cy.get('cd-host-details').within(() => { + hosts.getTableCount('total').should('be.gte', 0); + }); + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/08-create-cluster-review.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/08-create-cluster-review.e2e-spec.ts deleted file mode 100644 index a472810e6e6c5..0000000000000 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/08-create-cluster-review.e2e-spec.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { CreateClusterWelcomePageHelper } from 'cypress/integration/cluster/cluster-welcome-page.po'; -import { CreateClusterReviewPageHelper } from 'cypress/integration/cluster/create-cluster-review.po'; - -describe('Create Cluster Review page', () => { - const reviewPage = new CreateClusterReviewPageHelper(); - const createCluster = new CreateClusterWelcomePageHelper(); - - beforeEach(() => { - cy.login(); - Cypress.Cookies.preserveOnce('token'); - createCluster.navigateTo(); - createCluster.createCluster(); - - cy.get('button[aria-label="Next"]').click(); - }); - - describe('navigation link and title test', () => { - it('should check if nav-link and title contains Review', () => { - cy.get('.nav-link').should('contain.text', 'Review'); - - cy.get('.title').should('contain.text', 'Review'); - }); - }); - - describe('fields check', () => { - it('should check cluster resources table is present', () => { - // check for table header 'Status' - reviewPage.getLegends().its(0).should('have.text', 'Cluster Resources'); - - // check for fields in table - reviewPage.getStatusTables().should('contain.text', 'Hosts'); - }); - - it('should check Hosts Per Label and Host Details tables are present', () => { - // check for there to be two tables - reviewPage.getDataTables().should('have.length', 2); - - // check for table header 'Hosts Per Label' - reviewPage.getLegends().its(1).should('have.text', 'Hosts Per Label'); - - // check for table header 'Host Details' - reviewPage.getLegends().its(2).should('have.text', 'Host Details'); - - // verify correct columns on Hosts Per Label table - reviewPage.getDataTableHeaders(0).contains('Label'); - - reviewPage.getDataTableHeaders(0).contains('Number of Hosts'); - - // verify correct columns on Host Details table - reviewPage.getDataTableHeaders(1).contains('Host Name'); - - reviewPage.getDataTableHeaders(1).contains('Labels'); - }); - - it('should check hosts count and default host name are present', () => { - reviewPage.getStatusTables().should('contain.text', '1'); - - reviewPage.checkDefaultHostName(); - }); - }); -}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts index 099b31efbda50..bdd2cb8978234 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts @@ -91,7 +91,7 @@ const routes: Routes = [ // Cluster { - path: 'create-cluster', + path: 'expand-cluster', component: CreateClusterComponent, canActivate: [ModuleStatusGuardService], data: { @@ -100,18 +100,18 @@ const routes: Routes = [ redirectTo: 'dashboard', backend: 'cephadm' }, - breadcrumbs: 'Create Cluster' + breadcrumbs: 'Expand Cluster' } }, { path: 'hosts', + component: HostsComponent, data: { breadcrumbs: 'Cluster/Hosts' }, children: [ - { path: '', component: HostsComponent }, { - path: URLVerbs.CREATE, + path: URLVerbs.ADD, component: HostFormComponent, - data: { breadcrumbs: ActionLabels.CREATE } + outlet: 'modal' } ] }, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts index a2c1e6d2f89ec..185c34b27504d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts @@ -5,6 +5,7 @@ import { RouterModule } from '@angular/router'; import { TreeModule } from '@circlon/angular-tree-component'; import { + NgbActiveModal, NgbDatepickerModule, NgbDropdownModule, NgbNavModule, @@ -106,7 +107,6 @@ import { TelemetryComponent } from './telemetry/telemetry.component'; OsdCreationPreviewModalComponent, RulesListComponent, ActiveAlertListComponent, - HostFormComponent, ServiceDetailsComponent, ServiceDaemonListComponent, TelemetryComponent, @@ -115,6 +115,7 @@ import { TelemetryComponent } from './telemetry/telemetry.component'; OsdFlagsIndivModalComponent, PlacementPipe, CreateClusterComponent - ] + ], + providers: [NgbActiveModal] }) export class ClusterModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html index 661c13fc931c9..fb006ec1beb10 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html @@ -1,4 +1,5 @@ -
+
@@ -10,16 +11,14 @@

Please proceed to complete the cluster creation

-
- + i18n>Expand Cluster
@@ -27,3 +26,48 @@
+ +
+
Expand Cluster
+
+ +
+ +
+

Add Hosts

+
+ +
+
+

Review

+
+

To be implemented

+
+
+
+
+ +
+ + + You are about to skip the cluster expansion process. + You’ll need to navigate through the menu to add hosts and services. + +
Are you sure you want to continue?
+
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss index e69de29bb2d1d..580c5219e9f2c 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss @@ -0,0 +1,26 @@ +@use './src/styles/vendor/variables' as vv; + +.container-fluid { + align-items: flex-start; + display: flex; + padding-left: 0; + width: 100%; +} + +.card-body { + max-width: 85%; +} + +.vertical-line { + border-left: 1px solid vv.$gray-400; +} + +cd-wizard { + width: 15%; +} + +cd-hosts { + ::ng-deep .nav { + display: none; + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts index 7e061b2e25c9a..1ebdfb3a59d2f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts @@ -5,7 +5,13 @@ import { RouterTestingModule } from '@angular/router/testing'; import { ToastrModule } from 'ngx-toastr'; -import { ClusterService } from '~/app/shared/api/cluster.service'; +import { CephModule } from '~/app/ceph/ceph.module'; +import { CoreModule } from '~/app/core/core.module'; +import { HostService } from '~/app/shared/api/host.service'; +import { ConfirmationModalComponent } from '~/app/shared/components/confirmation-modal/confirmation-modal.component'; +import { AppConstants } from '~/app/shared/constants/app.constants'; +import { ModalService } from '~/app/shared/services/modal.service'; +import { WizardStepsService } from '~/app/shared/services/wizard-steps.service'; import { SharedModule } from '~/app/shared/shared.module'; import { configureTestBed } from '~/testing/unit-test-helper'; import { CreateClusterComponent } from './create-cluster.component'; @@ -13,17 +19,31 @@ import { CreateClusterComponent } from './create-cluster.component'; describe('CreateClusterComponent', () => { let component: CreateClusterComponent; let fixture: ComponentFixture; - let clusterService: ClusterService; + let wizardStepService: WizardStepsService; + let hostService: HostService; + let modalServiceShowSpy: jasmine.Spy; + const projectConstants: typeof AppConstants = AppConstants; configureTestBed({ - declarations: [CreateClusterComponent], - imports: [HttpClientTestingModule, RouterTestingModule, ToastrModule.forRoot(), SharedModule] + imports: [ + HttpClientTestingModule, + RouterTestingModule, + ToastrModule.forRoot(), + SharedModule, + CoreModule, + CephModule + ] }); beforeEach(() => { fixture = TestBed.createComponent(CreateClusterComponent); component = fixture.componentInstance; - clusterService = TestBed.inject(ClusterService); + wizardStepService = TestBed.inject(WizardStepsService); + hostService = TestBed.inject(HostService); + modalServiceShowSpy = spyOn(TestBed.inject(ModalService), 'show').and.returnValue({ + // mock the close function, it might be called if there are async tests. + close: jest.fn() + }); fixture.detectChanges(); }); @@ -31,15 +51,65 @@ describe('CreateClusterComponent', () => { expect(component).toBeTruthy(); }); - it('should have the heading "Welcome to Ceph Dashboard"', () => { + it('should have project name as heading in welcome screen', () => { const heading = fixture.debugElement.query(By.css('h3')).nativeElement; - expect(heading.innerHTML).toBe('Welcome to Ceph Dashboard'); + expect(heading.innerHTML).toBe(`Welcome to ${projectConstants.projectName}`); }); - it('should call updateStatus when cluster creation is skipped', () => { - const clusterServiceSpy = spyOn(clusterService, 'updateStatus').and.callThrough(); - expect(clusterServiceSpy).not.toHaveBeenCalled(); + it('should show confirmation modal when cluster creation is skipped', () => { component.skipClusterCreation(); - expect(clusterServiceSpy).toHaveBeenCalledTimes(1); + expect(modalServiceShowSpy.calls.any()).toBeTruthy(); + expect(modalServiceShowSpy.calls.first().args[0]).toBe(ConfirmationModalComponent); + }); + + it('should show the wizard when cluster creation is started', () => { + component.createCluster(); + fixture.detectChanges(); + const nativeEl = fixture.debugElement.nativeElement; + expect(nativeEl.querySelector('cd-wizard')).not.toBe(null); + }); + + it('should have title Add Hosts', () => { + component.createCluster(); + fixture.detectChanges(); + const heading = fixture.debugElement.query(By.css('.title')).nativeElement; + expect(heading.innerHTML).toBe('Add Hosts'); + }); + + it('should show the host list when cluster creation as first step', () => { + component.createCluster(); + fixture.detectChanges(); + const nativeEl = fixture.debugElement.nativeElement; + expect(nativeEl.querySelector('cd-hosts')).not.toBe(null); + }); + + it('should move to next step and show the second page', () => { + const wizardStepServiceSpy = spyOn(wizardStepService, 'moveToNextStep').and.callThrough(); + const hostServiceSpy = spyOn(hostService, 'list').and.callThrough(); + component.createCluster(); + fixture.detectChanges(); + component.onNextStep(); + fixture.detectChanges(); + const heading = fixture.debugElement.query(By.css('.title')).nativeElement; + expect(wizardStepServiceSpy).toHaveBeenCalledTimes(1); + expect(hostServiceSpy).toBeCalledTimes(1); + expect(heading.innerHTML).toBe('Review'); + }); + + it('should show the button labels correctly', () => { + component.createCluster(); + fixture.detectChanges(); + let submitBtnLabel = component.showSubmitButtonLabel(); + expect(submitBtnLabel).toEqual('Next'); + let cancelBtnLabel = component.showCancelButtonLabel(); + expect(cancelBtnLabel).toEqual('Cancel'); + + // Last page of the wizard + component.onNextStep(); + fixture.detectChanges(); + submitBtnLabel = component.showSubmitButtonLabel(); + expect(submitBtnLabel).toEqual('Expand Cluster'); + cancelBtnLabel = component.showCancelButtonLabel(); + expect(cancelBtnLabel).toEqual('Back'); }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts index 239a4f13ca7f0..b47a63e8cec00 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts @@ -1,44 +1,134 @@ -import { Component } from '@angular/core'; +import { Component, OnDestroy, TemplateRef, ViewChild } from '@angular/core'; +import { Router } from '@angular/router'; + +import { NgbModalRef } from '@ng-bootstrap/ng-bootstrap'; +import { forkJoin, Subscription } from 'rxjs'; +import { finalize } from 'rxjs/operators'; import { ClusterService } from '~/app/shared/api/cluster.service'; -import { AppConstants } from '~/app/shared/constants/app.constants'; +import { HostService } from '~/app/shared/api/host.service'; +import { ConfirmationModalComponent } from '~/app/shared/components/confirmation-modal/confirmation-modal.component'; +import { ActionLabelsI18n, AppConstants } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; -import { Permission } from '~/app/shared/models/permissions'; +import { Permissions } from '~/app/shared/models/permissions'; +import { WizardStepModel } from '~/app/shared/models/wizard-steps'; import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { ModalService } from '~/app/shared/services/modal.service'; import { NotificationService } from '~/app/shared/services/notification.service'; +import { WizardStepsService } from '~/app/shared/services/wizard-steps.service'; @Component({ selector: 'cd-create-cluster', templateUrl: './create-cluster.component.html', styleUrls: ['./create-cluster.component.scss'] }) -export class CreateClusterComponent { - permission: Permission; - orchStatus = false; - featureAvailable = false; +export class CreateClusterComponent implements OnDestroy { + @ViewChild('skipConfirmTpl', { static: true }) + skipConfirmTpl: TemplateRef; + currentStep: WizardStepModel; + currentStepSub: Subscription; + permissions: Permissions; projectConstants: typeof AppConstants = AppConstants; + stepTitles = ['Add Hosts', 'Review']; + startClusterCreation = false; + observables: any = []; + modalRef: NgbModalRef; constructor( private authStorageService: AuthStorageService, + private stepsService: WizardStepsService, + private router: Router, + private hostService: HostService, + private notificationService: NotificationService, + private actionLabels: ActionLabelsI18n, private clusterService: ClusterService, - private notificationService: NotificationService + private modalService: ModalService ) { - this.permission = this.authStorageService.getPermissions().configOpt; + this.permissions = this.authStorageService.getPermissions(); + this.currentStepSub = this.stepsService.getCurrentStep().subscribe((step: WizardStepModel) => { + this.currentStep = step; + }); + this.currentStep.stepIndex = 1; } createCluster() { - this.notificationService.show( - NotificationType.error, - $localize`Cluster creation feature not implemented` - ); + this.startClusterCreation = true; } skipClusterCreation() { - this.clusterService.updateStatus('POST_INSTALLED').subscribe(() => { - this.notificationService.show( - NotificationType.info, - $localize`Cluster creation skipped by user` - ); - }); + const modalVariables = { + titleText: $localize`Warning`, + buttonText: $localize`Continue`, + warning: true, + bodyTpl: this.skipConfirmTpl, + showSubmit: true, + onSubmit: () => { + this.clusterService.updateStatus('POST_INSTALLED').subscribe({ + error: () => this.modalRef.close(), + complete: () => { + this.notificationService.show( + NotificationType.info, + $localize`Cluster expansion skipped by user` + ); + this.router.navigate(['/dashboard']); + this.modalRef.close(); + } + }); + } + }; + this.modalRef = this.modalService.show(ConfirmationModalComponent, modalVariables); + } + + onSubmit() { + forkJoin(this.observables) + .pipe( + finalize(() => + this.clusterService.updateStatus('POST_INSTALLED').subscribe(() => { + this.notificationService.show( + NotificationType.success, + $localize`Cluster expansion was successful` + ); + this.router.navigate(['/dashboard']); + }) + ) + ) + .subscribe({ + error: (error) => error.preventDefault() + }); + } + + onNextStep() { + if (!this.stepsService.isLastStep()) { + this.hostService.list().subscribe((hosts) => { + hosts.forEach((host) => { + if (host['status'] === 'maintenance') { + this.observables.push(this.hostService.update(host['hostname'], false, [], true)); + } + }); + }); + this.stepsService.moveToNextStep(); + } else { + this.onSubmit(); + } + } + + onPreviousStep() { + if (!this.stepsService.isFirstStep()) { + this.stepsService.moveToPreviousStep(); + } else { + this.router.navigate(['/dashboard']); + } + } + + showSubmitButtonLabel() { + return !this.stepsService.isLastStep() ? this.actionLabels.NEXT : $localize`Expand Cluster`; + } + + showCancelButtonLabel() { + return !this.stepsService.isFirstStep() ? this.actionLabels.BACK : this.actionLabels.CANCEL; + } + + ngOnDestroy(): void { + this.currentStepSub.unsubscribe(); } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.html index 487caf85b12de..a3477b9bd4102 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.html @@ -1,91 +1,95 @@ -
-
-
-
{{ action | titlecase }} {{ resource | upperFirst }}
+ + {{ action | titlecase }} {{ resource | upperFirst }} -
+ - -
- -
- - This field is required. - The chosen hostname is already in use. +
+ + + - -
- -
- - The value is not a valid IP address. + +
+ +
+ + The value is not a valid IP address. +
-
- -
- -
- - + +
+ +
+ + +
-
- -
-
-
- - + +
+
+
+ + +
-
- + +
- -
+ + diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.spec.ts index dbb834ea8c82c..ed3daf1e4b49b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.spec.ts @@ -3,6 +3,7 @@ import { ComponentFixture, fakeAsync, TestBed, tick } from '@angular/core/testin import { ReactiveFormsModule } from '@angular/forms'; import { RouterTestingModule } from '@angular/router/testing'; +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { ToastrModule } from 'ngx-toastr'; import { LoadingPanelComponent } from '~/app/shared/components/loading-panel/loading-panel.component'; @@ -24,7 +25,8 @@ describe('HostFormComponent', () => { ReactiveFormsModule, ToastrModule.forRoot() ], - declarations: [HostFormComponent] + declarations: [HostFormComponent], + providers: [NgbActiveModal] }, [LoadingPanelComponent] ); @@ -32,6 +34,7 @@ describe('HostFormComponent', () => { beforeEach(() => { fixture = TestBed.createComponent(HostFormComponent); component = fixture.componentInstance; + component.ngOnInit(); formHelper = new FormHelper(component.hostForm); fixture.detectChanges(); }); @@ -40,6 +43,11 @@ describe('HostFormComponent', () => { expect(component).toBeTruthy(); }); + it('should open the form in a modal', () => { + const nativeEl = fixture.debugElement.nativeElement; + expect(nativeEl.querySelector('cd-modal')).not.toBe(null); + }); + it('should validate the network address is valid', fakeAsync(() => { formHelper.setValue('addr', '115.42.150.37', true); tick(); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.ts index b90312ff855f2..2fc8b13b540e6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.ts @@ -2,6 +2,8 @@ import { Component, OnInit } from '@angular/core'; import { FormControl, Validators } from '@angular/forms'; import { Router } from '@angular/router'; +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; + import { HostService } from '~/app/shared/api/host.service'; import { SelectMessages } from '~/app/shared/components/select/select-messages.model'; import { ActionLabelsI18n, URLVerbs } from '~/app/shared/constants/app.constants'; @@ -24,6 +26,7 @@ export class HostFormComponent extends CdForm implements OnInit { addr: string; status: string; allLabels: any; + pageURL: string; messages = new SelectMessages({ empty: $localize`There are no labels.`, @@ -35,15 +38,19 @@ export class HostFormComponent extends CdForm implements OnInit { private router: Router, private actionLabels: ActionLabelsI18n, private hostService: HostService, - private taskWrapper: TaskWrapperService + private taskWrapper: TaskWrapperService, + public activeModal: NgbActiveModal ) { super(); this.resource = $localize`host`; - this.action = this.actionLabels.CREATE; - this.createForm(); + this.action = this.actionLabels.ADD; } ngOnInit() { + if (this.router.url.includes('hosts')) { + this.pageURL = 'hosts'; + } + this.createForm(); this.hostService.list().subscribe((resp: any[]) => { this.hostnames = resp.map((host) => { return host['hostname']; @@ -53,6 +60,7 @@ export class HostFormComponent extends CdForm implements OnInit { } private createForm() { + const disableMaintenance = this.pageURL !== 'hosts'; this.hostForm = new CdFormGroup({ hostname: new FormControl('', { validators: [ @@ -66,7 +74,7 @@ export class HostFormComponent extends CdForm implements OnInit { validators: [CdValidators.ip()] }), labels: new FormControl([]), - maintenance: new FormControl(false) + maintenance: new FormControl({ value: disableMaintenance, disabled: disableMaintenance }) }); } @@ -77,7 +85,7 @@ export class HostFormComponent extends CdForm implements OnInit { this.allLabels = this.hostForm.get('labels').value; this.taskWrapper .wrapTaskAroundCall({ - task: new FinishedTask('host/' + URLVerbs.CREATE, { + task: new FinishedTask('host/' + URLVerbs.ADD, { hostname: hostname }), call: this.hostService.create(hostname, this.addr, this.allLabels, this.status) @@ -87,7 +95,9 @@ export class HostFormComponent extends CdForm implements OnInit { this.hostForm.setErrors({ cdSubmitButton: true }); }, complete: () => { - this.router.navigate(['/hosts']); + this.pageURL === 'hosts' + ? this.router.navigate([this.pageURL, { outlets: { modal: null } }]) + : this.activeModal.close(); } }); } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.html index f31adf9e5c0e7..59bab46d72d23 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.html @@ -5,12 +5,13 @@ Hosts List -
@@ -29,7 +30,7 @@
  • + *ngIf="permissions.grafana.read && !clusterCreation"> Overall Performance @@ -66,3 +67,4 @@ Are you sure you want to continue? + diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.spec.ts index f4d400f3230c8..049aceba0994a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.spec.ts @@ -72,6 +72,7 @@ describe('HostsComponent', () => { showForceMaintenanceModal = new MockShowForceMaintenanceModal(); fixture = TestBed.createComponent(HostsComponent); component = fixture.componentInstance; + component.clusterCreation = false; hostListSpy = spyOn(TestBed.inject(HostService), 'list'); orchService = TestBed.inject(OrchestratorService); }); @@ -182,7 +183,7 @@ describe('HostsComponent', () => { const tests = [ { expectResults: { - Create: { disabled: false, disableDesc: '' }, + Add: { disabled: false, disableDesc: '' }, Edit: { disabled: true, disableDesc: '' }, Delete: { disabled: true, disableDesc: '' } } @@ -190,7 +191,7 @@ describe('HostsComponent', () => { { selectRow: fakeHosts[0], // non-orchestrator host expectResults: { - Create: { disabled: false, disableDesc: '' }, + Add: { disabled: false, disableDesc: '' }, Edit: { disabled: true, disableDesc: component.messages.nonOrchHost }, Delete: { disabled: true, disableDesc: component.messages.nonOrchHost } } @@ -198,7 +199,7 @@ describe('HostsComponent', () => { { selectRow: fakeHosts[1], // orchestrator host expectResults: { - Create: { disabled: false, disableDesc: '' }, + Add: { disabled: false, disableDesc: '' }, Edit: { disabled: false, disableDesc: '' }, Delete: { disabled: false, disableDesc: '' } } @@ -222,7 +223,7 @@ describe('HostsComponent', () => { const tests = [ { expectResults: { - Create: resultNoOrchestrator, + Add: resultNoOrchestrator, Edit: { disabled: true, disableDesc: '' }, Delete: { disabled: true, disableDesc: '' } } @@ -230,7 +231,7 @@ describe('HostsComponent', () => { { selectRow: fakeHosts[0], // non-orchestrator host expectResults: { - Create: resultNoOrchestrator, + Add: resultNoOrchestrator, Edit: { disabled: true, disableDesc: component.messages.nonOrchHost }, Delete: { disabled: true, disableDesc: component.messages.nonOrchHost } } @@ -238,7 +239,7 @@ describe('HostsComponent', () => { { selectRow: fakeHosts[1], // orchestrator host expectResults: { - Create: resultNoOrchestrator, + Add: resultNoOrchestrator, Edit: resultNoOrchestrator, Delete: resultNoOrchestrator } @@ -255,7 +256,7 @@ describe('HostsComponent', () => { const tests = [ { expectResults: { - Create: resultMissingFeatures, + Add: resultMissingFeatures, Edit: { disabled: true, disableDesc: '' }, Delete: { disabled: true, disableDesc: '' } } @@ -263,7 +264,7 @@ describe('HostsComponent', () => { { selectRow: fakeHosts[0], // non-orchestrator host expectResults: { - Create: resultMissingFeatures, + Add: resultMissingFeatures, Edit: { disabled: true, disableDesc: component.messages.nonOrchHost }, Delete: { disabled: true, disableDesc: component.messages.nonOrchHost } } @@ -271,7 +272,7 @@ describe('HostsComponent', () => { { selectRow: fakeHosts[1], // orchestrator host expectResults: { - Create: resultMissingFeatures, + Add: resultMissingFeatures, Edit: resultMissingFeatures, Delete: resultMissingFeatures } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts index 8c559a748b05d..1a3798b8ff9c0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts @@ -1,4 +1,4 @@ -import { Component, OnInit, TemplateRef, ViewChild } from '@angular/core'; +import { Component, Input, OnInit, TemplateRef, ViewChild } from '@angular/core'; import { Router } from '@angular/router'; import { NgbModalRef } from '@ng-bootstrap/ng-bootstrap'; @@ -11,7 +11,7 @@ import { ConfirmationModalComponent } from '~/app/shared/components/confirmation import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component'; import { FormModalComponent } from '~/app/shared/components/form-modal/form-modal.component'; import { SelectMessages } from '~/app/shared/components/select/select-messages.model'; -import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; +import { ActionLabelsI18n, URLVerbs } from '~/app/shared/constants/app.constants'; import { TableComponent } from '~/app/shared/datatable/table/table.component'; import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; import { Icons } from '~/app/shared/enum/icons.enum'; @@ -30,6 +30,7 @@ import { ModalService } from '~/app/shared/services/modal.service'; import { NotificationService } from '~/app/shared/services/notification.service'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; import { URLBuilderService } from '~/app/shared/services/url-builder.service'; +import { HostFormComponent } from './host-form/host-form.component'; const BASE_URL = 'hosts'; @@ -46,6 +47,8 @@ export class HostsComponent extends ListWithDetails implements OnInit { public servicesTpl: TemplateRef; @ViewChild('maintenanceConfirmTpl', { static: true }) maintenanceConfirmTpl: TemplateRef; + @Input() + clusterCreation = false; permissions: Permissions; columns: Array = []; @@ -58,6 +61,8 @@ export class HostsComponent extends ListWithDetails implements OnInit { isExecuting = false; errorMessage: string; enableButton: boolean; + pageURL: string; + bsModalRef: NgbModalRef; icons = Icons; @@ -67,7 +72,7 @@ export class HostsComponent extends ListWithDetails implements OnInit { orchStatus: OrchestratorStatus; actionOrchFeatures = { - create: [OrchestratorFeature.HOST_CREATE], + add: [OrchestratorFeature.HOST_CREATE], edit: [OrchestratorFeature.HOST_LABEL_ADD, OrchestratorFeature.HOST_LABEL_REMOVE], delete: [OrchestratorFeature.HOST_DELETE], maintenance: [ @@ -80,7 +85,6 @@ export class HostsComponent extends ListWithDetails implements OnInit { private authStorageService: AuthStorageService, private hostService: HostService, private cephShortVersionPipe: CephShortVersionPipe, - private urlBuilder: URLBuilderService, private actionLabels: ActionLabelsI18n, private modalService: ModalService, private taskWrapper: TaskWrapperService, @@ -91,13 +95,6 @@ export class HostsComponent extends ListWithDetails implements OnInit { super(); this.permissions = this.authStorageService.getPermissions(); this.tableActions = [ - { - name: this.actionLabels.CREATE, - permission: 'create', - icon: Icons.add, - click: () => this.router.navigate([this.urlBuilder.getCreate()]), - disable: (selection: CdTableSelection) => this.getDisable('create', selection) - }, { name: this.actionLabels.EDIT, permission: 'update', @@ -118,7 +115,10 @@ export class HostsComponent extends ListWithDetails implements OnInit { icon: Icons.enter, click: () => this.hostMaintenance(), disable: (selection: CdTableSelection) => - this.getDisable('maintenance', selection) || this.isExecuting || this.enableButton + this.getDisable('maintenance', selection) || + this.isExecuting || + this.enableButton || + this.clusterCreation }, { name: this.actionLabels.EXIT_MAINTENANCE, @@ -126,12 +126,25 @@ export class HostsComponent extends ListWithDetails implements OnInit { icon: Icons.exit, click: () => this.hostMaintenance(), disable: (selection: CdTableSelection) => - this.getDisable('maintenance', selection) || this.isExecuting || !this.enableButton + this.getDisable('maintenance', selection) || + this.isExecuting || + !this.enableButton || + this.clusterCreation } ]; } ngOnInit() { + this.tableActions.unshift({ + name: this.actionLabels.ADD, + permission: 'create', + icon: Icons.add, + click: () => + this.clusterCreation + ? (this.bsModalRef = this.modalService.show(HostFormComponent)) + : this.router.navigate([BASE_URL, { outlets: { modal: [URLVerbs.ADD] } }]), + disable: (selection: CdTableSelection) => this.getDisable('add', selection) + }); this.columns = [ { name: $localize`Hostname`, @@ -141,6 +154,7 @@ export class HostsComponent extends ListWithDetails implements OnInit { { name: $localize`Services`, prop: 'services', + isHidden: this.clusterCreation, flexGrow: 3, cellTemplate: this.servicesTpl }, @@ -167,6 +181,7 @@ export class HostsComponent extends ListWithDetails implements OnInit { { name: $localize`Version`, prop: 'ceph_version', + isHidden: this.clusterCreation, flexGrow: 1, pipe: this.cephShortVersionPipe } @@ -287,7 +302,7 @@ export class HostsComponent extends ListWithDetails implements OnInit { } getDisable( - action: 'create' | 'edit' | 'delete' | 'maintenance', + action: 'add' | 'edit' | 'delete' | 'maintenance', selection: CdTableSelection ): boolean | string { if (action === 'delete' || action === 'edit' || action === 'maintenance') { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.spec.ts index e85223c80e39c..93c9e9adcbbf0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.spec.ts @@ -1,6 +1,7 @@ import { HttpClientTestingModule, HttpTestingController } from '@angular/common/http/testing'; import { ComponentFixture, TestBed } from '@angular/core/testing'; import { ReactiveFormsModule } from '@angular/forms'; +import { RouterTestingModule } from '@angular/router/testing'; import { NgbActiveModal, NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'; import { ToastrModule } from 'ngx-toastr'; @@ -26,7 +27,8 @@ describe('OsdFlagsIndivModalComponent', () => { ReactiveFormsModule, SharedModule, ToastrModule.forRoot(), - NgbTooltipModule + NgbTooltipModule, + RouterTestingModule ], declarations: [OsdFlagsIndivModalComponent], providers: [NgbActiveModal] diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts index 3cbfab4ebaac3..fc02e9bdeeefb 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts @@ -53,6 +53,6 @@ describe('LoginComponent', () => { component.login(); expect(routerNavigateSpy).toHaveBeenCalledTimes(1); - expect(routerNavigateSpy).toHaveBeenCalledWith(['/create-cluster']); + expect(routerNavigateSpy).toHaveBeenCalledWith(['/expand-cluster']); }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts index 77bafd99c82e0..a98548f94c766 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts @@ -65,10 +65,10 @@ export class LoginComponent implements OnInit { login() { this.authService.login(this.model).subscribe(() => { - const urlPath = this.postInstalled ? '/' : '/create-cluster'; + const urlPath = this.postInstalled ? '/' : '/expand-cluster'; let url = _.get(this.route.snapshot.queryParams, 'returnUrl', urlPath); if (!this.postInstalled && this.route.snapshot.queryParams['returnUrl'] === '/dashboard') { - url = '/create-cluster'; + url = '/expand-cluster'; } this.router.navigate([url]); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts index ef8b423a3a70c..a6d0624d31885 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts @@ -45,6 +45,7 @@ import { SparklineComponent } from './sparkline/sparkline.component'; import { SubmitButtonComponent } from './submit-button/submit-button.component'; import { TelemetryNotificationComponent } from './telemetry-notification/telemetry-notification.component'; import { UsageBarComponent } from './usage-bar/usage-bar.component'; +import { WizardComponent } from './wizard/wizard.component'; @NgModule({ imports: [ @@ -93,7 +94,8 @@ import { UsageBarComponent } from './usage-bar/usage-bar.component'; Copy2ClipboardButtonComponent, DownloadButtonComponent, FormButtonPanelComponent, - MotdComponent + MotdComponent, + WizardComponent ], providers: [], exports: [ @@ -120,7 +122,8 @@ import { UsageBarComponent } from './usage-bar/usage-bar.component'; Copy2ClipboardButtonComponent, DownloadButtonComponent, FormButtonPanelComponent, - MotdComponent + MotdComponent, + WizardComponent ] }) export class ComponentsModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.html index 5cbd4f58c52fa..657e0d6053f89 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.html @@ -1,13 +1,19 @@ - +
    +
    + +
    +
    diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.spec.ts index d3ee1ca2abd19..cf08bef10090d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.spec.ts @@ -1,4 +1,6 @@ import { ComponentFixture, TestBed } from '@angular/core/testing'; +import { Router } from '@angular/router'; +import { RouterTestingModule } from '@angular/router/testing'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; @@ -8,14 +10,18 @@ import { ModalComponent } from './modal.component'; describe('ModalComponent', () => { let component: ModalComponent; let fixture: ComponentFixture; + let routerNavigateSpy: jasmine.Spy; configureTestBed({ - declarations: [ModalComponent] + declarations: [ModalComponent], + imports: [RouterTestingModule] }); beforeEach(() => { fixture = TestBed.createComponent(ModalComponent); component = fixture.componentInstance; + routerNavigateSpy = spyOn(TestBed.inject(Router), 'navigate'); + routerNavigateSpy.and.returnValue(true); fixture.detectChanges(); }); @@ -38,4 +44,11 @@ describe('ModalComponent', () => { component.close(); expect(component.modalRef.close).toHaveBeenCalled(); }); + + it('should hide the routed modal', () => { + component.pageURL = 'hosts'; + component.close(); + expect(routerNavigateSpy).toHaveBeenCalledTimes(1); + expect(routerNavigateSpy).toHaveBeenCalledWith(['hosts', { outlets: { modal: null } }]); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.ts index 730da6d62527b..25e06e62af188 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/modal/modal.component.ts @@ -1,4 +1,5 @@ import { Component, EventEmitter, Input, Output } from '@angular/core'; +import { Router } from '@angular/router'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; @@ -10,6 +11,8 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; export class ModalComponent { @Input() modalRef: NgbActiveModal; + @Input() + pageURL: string; /** * Should be a function that is triggered when the modal is hidden. @@ -17,8 +20,12 @@ export class ModalComponent { @Output() hide = new EventEmitter(); + constructor(private router: Router) {} + close() { - this.modalRef?.close(); + this.pageURL + ? this.router.navigate([this.pageURL, { outlets: { modal: null } }]) + : this.modalRef?.close(); this.hide.emit(); } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.html new file mode 100644 index 0000000000000..25aa3e1df855e --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.html @@ -0,0 +1,19 @@ + diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.scss new file mode 100644 index 0000000000000..80e3550cd68a5 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.scss @@ -0,0 +1,30 @@ +@use './src/styles/vendor/variables' as vv; + +.card-body { + padding-left: 0; +} + +span.circle-step { + background: vv.$gray-500; + border-radius: 0.8em; + color: vv.$white; + display: inline-block; + font-weight: bold; + line-height: 1.6em; + margin-right: 5px; + text-align: center; + width: 1.6em; + + &.active { + background-color: vv.$primary; + } +} + +.nav-pills .nav-link { + background-color: vv.$white; + color: vv.$gray-800; + + &.active { + color: vv.$primary; + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.spec.ts new file mode 100644 index 0000000000000..b42578fb71193 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { SharedModule } from '~/app/shared/shared.module'; +import { configureTestBed } from '~/testing/unit-test-helper'; +import { WizardComponent } from './wizard.component'; + +describe('WizardComponent', () => { + let component: WizardComponent; + let fixture: ComponentFixture; + + configureTestBed({ + imports: [SharedModule] + }); + + beforeEach(() => { + fixture = TestBed.createComponent(WizardComponent); + component = fixture.componentInstance; + component.stepsTitle = ['Add Hosts', 'Review']; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.ts new file mode 100644 index 0000000000000..d46aa480e7918 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/wizard/wizard.component.ts @@ -0,0 +1,39 @@ +import { Component, Input, OnDestroy, OnInit } from '@angular/core'; + +import * as _ from 'lodash'; +import { Observable, Subscription } from 'rxjs'; + +import { WizardStepModel } from '~/app/shared/models/wizard-steps'; +import { WizardStepsService } from '~/app/shared/services/wizard-steps.service'; + +@Component({ + selector: 'cd-wizard', + templateUrl: './wizard.component.html', + styleUrls: ['./wizard.component.scss'] +}) +export class WizardComponent implements OnInit, OnDestroy { + @Input() + stepsTitle: string[]; + + steps: Observable; + currentStep: WizardStepModel; + currentStepSub: Subscription; + + constructor(private stepsService: WizardStepsService) {} + + ngOnInit(): void { + this.stepsService.setTotalSteps(this.stepsTitle.length); + this.steps = this.stepsService.getSteps(); + this.currentStepSub = this.stepsService.getCurrentStep().subscribe((step: WizardStepModel) => { + this.currentStep = step; + }); + } + + onStepClick(step: WizardStepModel) { + this.stepsService.setCurrentStep(step); + } + + ngOnDestroy(): void { + this.currentStepSub.unsubscribe(); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/wizard-steps.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/wizard-steps.ts new file mode 100644 index 0000000000000..177feb486d1a1 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/wizard-steps.ts @@ -0,0 +1,4 @@ +export interface WizardStepModel { + stepIndex: number; + isComplete: boolean; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts index c39bb0c26b695..44eb9bd30978a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts @@ -114,9 +114,7 @@ export class TaskMessageService { messages = { // Host tasks - 'host/create': this.newTaskMessage(this.commonOperations.create, (metadata) => - this.host(metadata) - ), + 'host/add': this.newTaskMessage(this.commonOperations.add, (metadata) => this.host(metadata)), 'host/delete': this.newTaskMessage(this.commonOperations.delete, (metadata) => this.host(metadata) ), diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/wizard-steps.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/wizard-steps.service.spec.ts new file mode 100644 index 0000000000000..47c2149756703 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/wizard-steps.service.spec.ts @@ -0,0 +1,16 @@ +import { TestBed } from '@angular/core/testing'; + +import { WizardStepsService } from './wizard-steps.service'; + +describe('WizardStepsService', () => { + let service: WizardStepsService; + + beforeEach(() => { + TestBed.configureTestingModule({}); + service = TestBed.inject(WizardStepsService); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/wizard-steps.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/wizard-steps.service.ts new file mode 100644 index 0000000000000..e0fb2be944de0 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/wizard-steps.service.ts @@ -0,0 +1,58 @@ +import { Injectable } from '@angular/core'; + +import { BehaviorSubject, Observable } from 'rxjs'; + +import { WizardStepModel } from '~/app/shared/models/wizard-steps'; + +const initialStep = [{ stepIndex: 1, isComplete: false }]; + +@Injectable({ + providedIn: 'root' +}) +export class WizardStepsService { + steps$: BehaviorSubject; + currentStep$: BehaviorSubject = new BehaviorSubject(null); + + constructor() { + this.steps$ = new BehaviorSubject(initialStep); + this.currentStep$.next(this.steps$.value[0]); + } + + setTotalSteps(step: number) { + const steps: WizardStepModel[] = []; + for (let i = 1; i <= step; i++) { + steps.push({ stepIndex: i, isComplete: false }); + } + this.steps$ = new BehaviorSubject(steps); + } + + setCurrentStep(step: WizardStepModel): void { + this.currentStep$.next(step); + } + + getCurrentStep(): Observable { + return this.currentStep$.asObservable(); + } + + getSteps(): Observable { + return this.steps$.asObservable(); + } + + moveToNextStep(): void { + const index = this.currentStep$.value.stepIndex; + this.currentStep$.next(this.steps$.value[index]); + } + + moveToPreviousStep(): void { + const index = this.currentStep$.value.stepIndex - 1; + this.currentStep$.next(this.steps$.value[index - 1]); + } + + isLastStep(): boolean { + return this.currentStep$.value.stepIndex === this.steps$.value.length; + } + + isFirstStep(): boolean { + return this.currentStep$.value?.stepIndex - 1 === 0; + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/styles/ceph-custom/_forms.scss b/src/pybind/mgr/dashboard/frontend/src/styles/ceph-custom/_forms.scss index cca9bd5d5d9d8..3c6ddbf80c998 100644 --- a/src/pybind/mgr/dashboard/frontend/src/styles/ceph-custom/_forms.scss +++ b/src/pybind/mgr/dashboard/frontend/src/styles/ceph-custom/_forms.scss @@ -53,6 +53,17 @@ } cd-modal { + .modal { + /* stylelint-disable */ + background-color: rgba(0, 0, 0, 0.4); + /* stylelint-enable */ + display: block; + } + + .modal-dialog { + max-width: 70vh; + } + .cd-col-form-label { @extend .col-lg-4; } From 2e31ba54273732db5d93a843d22345b676fcecf7 Mon Sep 17 00:00:00 2001 From: Avan Thakkar Date: Fri, 9 Jul 2021 18:57:35 +0530 Subject: [PATCH 0125/2866] mgr/dashboard: Review Section for the Create Cluster Workflow Fixes: https://tracker.ceph.com/issues/50566 Signed-off-by: Avan Thakkar --- .../src/app/ceph/cluster/cluster.module.ts | 4 +- .../create-cluster-review.component.html | 31 +++++++ .../create-cluster-review.component.scss | 0 .../create-cluster-review.component.spec.ts | 61 +++++++++++++ .../create-cluster-review.component.ts | 86 +++++++++++++++++++ .../create-cluster.component.html | 5 +- .../create-cluster.component.spec.ts | 4 +- 7 files changed, 183 insertions(+), 8 deletions(-) create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.scss create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.spec.ts create mode 100644 src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts index 185c34b27504d..610bb79baebc7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts @@ -22,6 +22,7 @@ import { CephSharedModule } from '../shared/ceph-shared.module'; import { ConfigurationDetailsComponent } from './configuration/configuration-details/configuration-details.component'; import { ConfigurationFormComponent } from './configuration/configuration-form/configuration-form.component'; import { ConfigurationComponent } from './configuration/configuration.component'; +import { CreateClusterReviewComponent } from './create-cluster/create-cluster-review.component'; import { CreateClusterComponent } from './create-cluster/create-cluster.component'; import { CrushmapComponent } from './crushmap/crushmap.component'; import { HostDetailsComponent } from './hosts/host-details/host-details.component'; @@ -114,7 +115,8 @@ import { TelemetryComponent } from './telemetry/telemetry.component'; ServiceFormComponent, OsdFlagsIndivModalComponent, PlacementPipe, - CreateClusterComponent + CreateClusterComponent, + CreateClusterReviewComponent ], providers: [NgbActiveModal] }) diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html new file mode 100644 index 0000000000000..fa010fdcd8ede --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html @@ -0,0 +1,31 @@ +
    +
    +
    + Cluster Resources + + + + + +
    Hosts{{ hostsCount }}
    +
    +
    + +
    + Hosts by Label + + + + Host Details + + +
    +
    diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.scss new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.spec.ts new file mode 100644 index 0000000000000..e823932c09732 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.spec.ts @@ -0,0 +1,61 @@ +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import _ from 'lodash'; +import { of } from 'rxjs'; + +import { CephModule } from '~/app/ceph/ceph.module'; +import { CoreModule } from '~/app/core/core.module'; +import { HostService } from '~/app/shared/api/host.service'; +import { SharedModule } from '~/app/shared/shared.module'; +import { configureTestBed } from '~/testing/unit-test-helper'; +import { CreateClusterReviewComponent } from './create-cluster-review.component'; + +describe('CreateClusterReviewComponent', () => { + let component: CreateClusterReviewComponent; + let fixture: ComponentFixture; + let hostService: HostService; + let hostListSpy: jasmine.Spy; + + configureTestBed({ + imports: [HttpClientTestingModule, SharedModule, CoreModule, CephModule] + }); + + beforeEach(() => { + fixture = TestBed.createComponent(CreateClusterReviewComponent); + component = fixture.componentInstance; + hostService = TestBed.inject(HostService); + hostListSpy = spyOn(hostService, 'list'); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should verify host metadata calculations', () => { + const hostnames = ['ceph.test1', 'ceph.test2']; + const payload = [ + { + hostname: hostnames[0], + ceph_version: 'ceph version Development', + labels: ['foo', 'bar'] + }, + { + hostname: hostnames[1], + ceph_version: 'ceph version Development', + labels: ['foo1', 'bar1'] + } + ]; + hostListSpy.and.callFake(() => of(payload)); + fixture.detectChanges(); + expect(hostListSpy).toHaveBeenCalled(); + + expect(component.hostsCount).toBe(2); + expect(component.uniqueLabels.size).toBe(4); + const labels = ['foo', 'bar', 'foo1', 'bar1']; + + labels.forEach((label) => { + expect(component.labelOccurrences[label]).toBe(1); + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts new file mode 100644 index 0000000000000..c78e8f910ece6 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts @@ -0,0 +1,86 @@ +import { Component, OnInit } from '@angular/core'; + +import _ from 'lodash'; + +import { HostService } from '~/app/shared/api/host.service'; +import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; + +@Component({ + selector: 'cd-create-cluster-review', + templateUrl: './create-cluster-review.component.html', + styleUrls: ['./create-cluster-review.component.scss'] +}) +export class CreateClusterReviewComponent implements OnInit { + hosts: object[] = []; + hostsDetails: object; + hostsByLabel: object; + hostsCount: number; + labelOccurrences = {}; + hostsCountPerLabel: object[] = []; + uniqueLabels: Set = new Set(); + + constructor(private hostService: HostService) {} + + ngOnInit() { + this.hostsDetails = { + columns: [ + { + prop: 'hostname', + name: $localize`Host Name`, + flexGrow: 2 + }, + { + name: $localize`Labels`, + prop: 'labels', + flexGrow: 1, + cellTransformation: CellTemplate.badge, + customTemplateConfig: { + class: 'badge-dark' + } + } + ] + }; + + this.hostsByLabel = { + columns: [ + { + prop: 'label', + name: $localize`Labels`, + flexGrow: 1, + cellTransformation: CellTemplate.badge, + customTemplateConfig: { + class: 'badge-dark' + } + }, + { + name: $localize`Number of Hosts`, + prop: 'hosts_per_label', + flexGrow: 1 + } + ] + }; + + this.hostService.list().subscribe((resp: object[]) => { + this.hosts = resp; + this.hostsCount = this.hosts.length; + + _.forEach(this.hosts, (hostKey) => { + const labels = hostKey['labels']; + _.forEach(labels, (label) => { + this.labelOccurrences[label] = (this.labelOccurrences[label] || 0) + 1; + this.uniqueLabels.add(label); + }); + }); + + this.uniqueLabels.forEach((label) => { + this.hostsCountPerLabel.push({ + label: label, + hosts_per_label: this.labelOccurrences[label] + }); + }); + + this.hostsByLabel['data'] = [...this.hostsCountPerLabel]; + this.hostsDetails['data'] = [...this.hosts]; + }); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html index fb006ec1beb10..38887328ec3ab 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html @@ -44,10 +44,7 @@
  • -

    Review

    -
    -

    To be implemented

    +
    diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts index 1ebdfb3a59d2f..b0564703840d4 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts @@ -90,10 +90,8 @@ describe('CreateClusterComponent', () => { fixture.detectChanges(); component.onNextStep(); fixture.detectChanges(); - const heading = fixture.debugElement.query(By.css('.title')).nativeElement; expect(wizardStepServiceSpy).toHaveBeenCalledTimes(1); - expect(hostServiceSpy).toBeCalledTimes(1); - expect(heading.innerHTML).toBe('Review'); + expect(hostServiceSpy).toBeCalledTimes(2); }); it('should show the button labels correctly', () => { From ad1535eea465f5e9f096600e01df1e95813ebb18 Mon Sep 17 00:00:00 2001 From: Nizamudeen A Date: Tue, 17 Aug 2021 19:04:31 +0530 Subject: [PATCH 0126/2866] mgr/dashboard: Cluster Creation Create OSDs Section Create OSDs section in cluster creation wizard Fixes: https://tracker.ceph.com/issues/51991 Fixes: https://tracker.ceph.com/issues/52298 Signed-off-by: Nizamudeen A Signed-off-by: Aashish Sharma --- .../integration/cluster/create-cluster.po.ts | 17 +++++++ .../02-create-cluster-add-host.e2e-spec.ts | 9 +++- .../03-create-cluster-create-osds.e2e-spec.ts | 38 ++++++++++++++ .../03-create-cluster-review.e2e-spec.ts | 4 +- .../workflow/04-cluster-check.e2e-spec.ts | 18 ++++++- .../create-cluster-review.component.html | 7 +++ .../create-cluster-review.component.ts | 9 +++- .../create-cluster.component.html | 9 ++++ .../create-cluster.component.scss | 4 ++ .../create-cluster.component.spec.ts | 33 ++++++++---- .../create-cluster.component.ts | 50 ++++++++++++++++--- .../hosts/host-form/host-form.component.ts | 8 +-- .../app/ceph/cluster/hosts/hosts.component.ts | 19 ++++--- .../inventory-devices.component.html | 2 +- .../inventory-devices.component.spec.ts | 12 +++++ .../inventory-devices.component.ts | 17 +++++++ .../inventory/inventory.component.spec.ts | 7 ++- ...osd-devices-selection-modal.component.html | 1 + .../osd-devices-selection-modal.component.ts | 6 ++- .../osd/osd-form/osd-form.component.html | 8 +-- .../osd/osd-form/osd-form.component.ts | 11 +++- .../shared/services/wizard-steps.service.ts | 5 ++ 22 files changed, 248 insertions(+), 46 deletions(-) create mode 100644 src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-create-osds.e2e-spec.ts diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster.po.ts index 22941b28e7cb7..4ae03f4aaae29 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/create-cluster.po.ts @@ -93,7 +93,10 @@ export class CreateClusterWizardHelper extends PageHelper { } } cy.get('cd-modal cd-submit-button').click(); + this.checkLabelExists(hostname, labels, add); + } + checkLabelExists(hostname: string, labels: string[], add: boolean) { // Verify labels are added or removed from Labels column // First find row with hostname, then find labels in the row this.getTableCell(this.columnIndex.hostname, hostname) @@ -110,4 +113,18 @@ export class CreateClusterWizardHelper extends PageHelper { } }); } + + createOSD(deviceType: 'hdd' | 'ssd') { + // Click Primary devices Add button + cy.get('cd-osd-devices-selection-groups[name="Primary"]').as('primaryGroups'); + cy.get('@primaryGroups').find('button').click(); + + // Select all devices with `deviceType` + cy.get('cd-osd-devices-selection-modal').within(() => { + cy.get('.modal-footer .tc_submitButton').as('addButton').should('be.disabled'); + this.filterTable('Type', deviceType); + this.getTableCount('total').should('be.gte', 1); + cy.get('@addButton').click(); + }); + } } diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/02-create-cluster-add-host.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/02-create-cluster-add-host.e2e-spec.ts index 7bf5b5be3d496..3b64c2987c9e5 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/02-create-cluster-add-host.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/02-create-cluster-add-host.e2e-spec.ts @@ -8,7 +8,7 @@ describe('Create cluster add host page', () => { 'ceph-node-02.cephlab.com' ]; const addHost = (hostname: string, exist?: boolean) => { - createCluster.add(hostname, exist, true); + createCluster.add(hostname, exist, false); createCluster.checkExist(hostname, true); }; @@ -25,13 +25,18 @@ describe('Create cluster add host page', () => { cy.get('.title').should('contain.text', 'Add Hosts'); }); - it('should check existing host and add new hosts into maintenance mode', () => { + it('should check existing host and add new hosts', () => { createCluster.checkExist(hostnames[0], true); addHost(hostnames[1], false); addHost(hostnames[2], false); }); + it('should verify "_no_schedule" label is added', () => { + createCluster.checkLabelExists(hostnames[1], ['_no_schedule'], true); + createCluster.checkLabelExists(hostnames[2], ['_no_schedule'], true); + }); + it('should not add an existing host', () => { createCluster.add(hostnames[0], true); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-create-osds.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-create-osds.e2e-spec.ts new file mode 100644 index 0000000000000..92c0739c5ede4 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-create-osds.e2e-spec.ts @@ -0,0 +1,38 @@ +import { CreateClusterWizardHelper } from 'cypress/integration/cluster/create-cluster.po'; +import { OSDsPageHelper } from 'cypress/integration/cluster/osds.po'; + +const osds = new OSDsPageHelper(); + +describe('Create cluster create osds page', () => { + const createCluster = new CreateClusterWizardHelper(); + + beforeEach(() => { + cy.login(); + Cypress.Cookies.preserveOnce('token'); + createCluster.navigateTo(); + createCluster.createCluster(); + cy.get('button[aria-label="Next"]').click(); + }); + + it('should check if nav-link and title contains Create OSDs', () => { + cy.get('.nav-link').should('contain.text', 'Create OSDs'); + + cy.get('.title').should('contain.text', 'Create OSDs'); + }); + + describe('when Orchestrator is available', () => { + it('should create OSDs', () => { + osds.navigateTo(); + osds.getTableCount('total').as('initOSDCount'); + + createCluster.navigateTo(); + createCluster.createCluster(); + cy.get('button[aria-label="Next"]').click(); + + createCluster.createOSD('hdd'); + + cy.get('button[aria-label="Next"]').click(); + cy.get('button[aria-label="Next"]').click(); + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-review.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-review.e2e-spec.ts index 17dd84994d60e..624f457458ff2 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-review.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/03-create-cluster-review.e2e-spec.ts @@ -10,13 +10,12 @@ describe('Create Cluster Review page', () => { createCluster.createCluster(); cy.get('button[aria-label="Next"]').click(); + cy.get('button[aria-label="Next"]').click(); }); describe('navigation link and title test', () => { it('should check if nav-link and title contains Review', () => { cy.get('.nav-link').should('contain.text', 'Review'); - - cy.get('.title').should('contain.text', 'Review'); }); }); @@ -27,6 +26,7 @@ describe('Create Cluster Review page', () => { // check for fields in table createCluster.getStatusTables().should('contain.text', 'Hosts'); + createCluster.getStatusTables().should('contain.text', 'Storage Capacity'); }); it('should check Hosts by Label and Host Details tables are present', () => { diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/04-cluster-check.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/04-cluster-check.e2e-spec.ts index 9717dd8b7dbf8..116cbd789c8c0 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/04-cluster-check.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/workflow/04-cluster-check.e2e-spec.ts @@ -1,5 +1,6 @@ import { CreateClusterWizardHelper } from 'cypress/integration/cluster/create-cluster.po'; import { HostsPageHelper } from 'cypress/integration/cluster/hosts.po'; +import { OSDsPageHelper } from 'cypress/integration/cluster/osds.po'; describe('when cluster creation is completed', () => { const createCluster = new CreateClusterWizardHelper(); @@ -13,6 +14,7 @@ describe('when cluster creation is completed', () => { createCluster.navigateTo(); createCluster.createCluster(); + cy.get('button[aria-label="Next"]').click(); cy.get('button[aria-label="Next"]').click(); cy.get('button[aria-label="Next"]').click(); @@ -26,9 +28,9 @@ describe('when cluster creation is completed', () => { beforeEach(() => { hosts.navigateTo(); }); - it('should have already exited from maintenance', () => { + it('should have removed "_no_schedule" label', () => { for (let host = 0; host < hostnames.length; host++) { - cy.get('datatable-row-wrapper').should('not.have.text', 'maintenance'); + cy.get('datatable-row-wrapper').should('not.have.text', '_no_schedule'); } }); @@ -46,4 +48,16 @@ describe('when cluster creation is completed', () => { }); }); }); + + describe('OSDs page', () => { + const osds = new OSDsPageHelper(); + + beforeEach(() => { + osds.navigateTo(); + }); + + it('should check if osds are created', { retries: 1 }, () => { + osds.expectTableCount('total', 2); + }); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html index fa010fdcd8ede..f95dfdb910f81 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html @@ -9,6 +9,13 @@ class="bold">Hosts {{ hostsCount }} + + Storage Capacity + Number of devices: {{ filteredDevices.length }}. Raw capacity: + {{ capacity | dimlessBinary }}. +
    diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts index c78e8f910ece6..3abbcb122865e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts @@ -4,6 +4,8 @@ import _ from 'lodash'; import { HostService } from '~/app/shared/api/host.service'; import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; +import { WizardStepsService } from '~/app/shared/services/wizard-steps.service'; +import { InventoryDevice } from '../inventory/inventory-devices/inventory-device.model'; @Component({ selector: 'cd-create-cluster-review', @@ -18,8 +20,10 @@ export class CreateClusterReviewComponent implements OnInit { labelOccurrences = {}; hostsCountPerLabel: object[] = []; uniqueLabels: Set = new Set(); + filteredDevices: InventoryDevice[] = []; + capacity = 0; - constructor(private hostService: HostService) {} + constructor(private hostService: HostService, public wizardStepService: WizardStepsService) {} ngOnInit() { this.hostsDetails = { @@ -82,5 +86,8 @@ export class CreateClusterReviewComponent implements OnInit { this.hostsByLabel['data'] = [...this.hostsCountPerLabel]; this.hostsDetails['data'] = [...this.hosts]; }); + + this.filteredDevices = this.wizardStepService.osdDevices; + this.capacity = this.wizardStepService.osdCapacity; } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html index 38887328ec3ab..d9e8ec43d9cc9 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html @@ -44,6 +44,15 @@
    +

    Create OSDs

    +
    +
    + +
    +
    +
    diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss index 580c5219e9f2c..a2e88899a66e2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.scss @@ -24,3 +24,7 @@ cd-hosts { display: none; } } + +.alignForm { + margin-left: -1%; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts index b0564703840d4..8f9d7328e34ee 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts @@ -9,6 +9,7 @@ import { CephModule } from '~/app/ceph/ceph.module'; import { CoreModule } from '~/app/core/core.module'; import { HostService } from '~/app/shared/api/host.service'; import { ConfirmationModalComponent } from '~/app/shared/components/confirmation-modal/confirmation-modal.component'; +import { LoadingPanelComponent } from '~/app/shared/components/loading-panel/loading-panel.component'; import { AppConstants } from '~/app/shared/constants/app.constants'; import { ModalService } from '~/app/shared/services/modal.service'; import { WizardStepsService } from '~/app/shared/services/wizard-steps.service'; @@ -24,16 +25,19 @@ describe('CreateClusterComponent', () => { let modalServiceShowSpy: jasmine.Spy; const projectConstants: typeof AppConstants = AppConstants; - configureTestBed({ - imports: [ - HttpClientTestingModule, - RouterTestingModule, - ToastrModule.forRoot(), - SharedModule, - CoreModule, - CephModule - ] - }); + configureTestBed( + { + imports: [ + HttpClientTestingModule, + RouterTestingModule, + ToastrModule.forRoot(), + SharedModule, + CoreModule, + CephModule + ] + }, + [LoadingPanelComponent] + ); beforeEach(() => { fixture = TestBed.createComponent(CreateClusterComponent); @@ -91,7 +95,7 @@ describe('CreateClusterComponent', () => { component.onNextStep(); fixture.detectChanges(); expect(wizardStepServiceSpy).toHaveBeenCalledTimes(1); - expect(hostServiceSpy).toBeCalledTimes(2); + expect(hostServiceSpy).toBeCalledTimes(1); }); it('should show the button labels correctly', () => { @@ -102,6 +106,13 @@ describe('CreateClusterComponent', () => { let cancelBtnLabel = component.showCancelButtonLabel(); expect(cancelBtnLabel).toEqual('Cancel'); + component.onNextStep(); + fixture.detectChanges(); + submitBtnLabel = component.showSubmitButtonLabel(); + expect(submitBtnLabel).toEqual('Next'); + cancelBtnLabel = component.showCancelButtonLabel(); + expect(cancelBtnLabel).toEqual('Back'); + // Last page of the wizard component.onNextStep(); fixture.detectChanges(); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts index b47a63e8cec00..6d78a2110b461 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts @@ -1,21 +1,26 @@ -import { Component, OnDestroy, TemplateRef, ViewChild } from '@angular/core'; +import { Component, EventEmitter, OnDestroy, Output, TemplateRef, ViewChild } from '@angular/core'; import { Router } from '@angular/router'; import { NgbModalRef } from '@ng-bootstrap/ng-bootstrap'; +import _ from 'lodash'; import { forkJoin, Subscription } from 'rxjs'; import { finalize } from 'rxjs/operators'; import { ClusterService } from '~/app/shared/api/cluster.service'; import { HostService } from '~/app/shared/api/host.service'; +import { OsdService } from '~/app/shared/api/osd.service'; import { ConfirmationModalComponent } from '~/app/shared/components/confirmation-modal/confirmation-modal.component'; -import { ActionLabelsI18n, AppConstants } from '~/app/shared/constants/app.constants'; +import { ActionLabelsI18n, AppConstants, URLVerbs } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; +import { FinishedTask } from '~/app/shared/models/finished-task'; import { Permissions } from '~/app/shared/models/permissions'; import { WizardStepModel } from '~/app/shared/models/wizard-steps'; import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; import { ModalService } from '~/app/shared/services/modal.service'; import { NotificationService } from '~/app/shared/services/notification.service'; +import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; import { WizardStepsService } from '~/app/shared/services/wizard-steps.service'; +import { DriveGroup } from '../osd/osd-form/drive-group.model'; @Component({ selector: 'cd-create-cluster', @@ -29,10 +34,15 @@ export class CreateClusterComponent implements OnDestroy { currentStepSub: Subscription; permissions: Permissions; projectConstants: typeof AppConstants = AppConstants; - stepTitles = ['Add Hosts', 'Review']; + stepTitles = ['Add Hosts', 'Create OSDs', 'Review']; startClusterCreation = false; observables: any = []; modalRef: NgbModalRef; + driveGroup = new DriveGroup(); + driveGroups: Object[] = []; + + @Output() + submitAction = new EventEmitter(); constructor( private authStorageService: AuthStorageService, @@ -42,7 +52,10 @@ export class CreateClusterComponent implements OnDestroy { private notificationService: NotificationService, private actionLabels: ActionLabelsI18n, private clusterService: ClusterService, - private modalService: ModalService + private modalService: ModalService, + private taskWrapper: TaskWrapperService, + private osdService: OsdService, + private wizardStepService: WizardStepsService ) { this.permissions = this.authStorageService.getPermissions(); this.currentStepSub = this.stepsService.getCurrentStep().subscribe((step: WizardStepModel) => { @@ -95,17 +108,42 @@ export class CreateClusterComponent implements OnDestroy { .subscribe({ error: (error) => error.preventDefault() }); + + this.taskWrapper + .wrapTaskAroundCall({ + task: new FinishedTask('osd/' + URLVerbs.CREATE, { + tracking_id: _.join(_.map(this.driveGroups, 'service_id'), ', ') + }), + call: this.osdService.create(this.driveGroups) + }) + .subscribe({ + error: (error) => error.preventDefault(), + complete: () => { + this.submitAction.emit(); + } + }); } onNextStep() { if (!this.stepsService.isLastStep()) { this.hostService.list().subscribe((hosts) => { hosts.forEach((host) => { - if (host['status'] === 'maintenance') { - this.observables.push(this.hostService.update(host['hostname'], false, [], true)); + const index = host['labels'].indexOf('_no_schedule', 0); + if (index > -1) { + host['labels'].splice(index, 1); + this.observables.push(this.hostService.update(host['hostname'], true, host['labels'])); } }); }); + this.driveGroup = this.wizardStepService.sharedData; + this.stepsService.getCurrentStep().subscribe((step: WizardStepModel) => { + this.currentStep = step; + }); + if (this.currentStep.stepIndex === 2 && this.driveGroup) { + const user = this.authStorageService.getUsername(); + this.driveGroup.setName(`dashboard-${user}-${_.now()}`); + this.driveGroups.push(this.driveGroup.spec); + } this.stepsService.moveToNextStep(); } else { this.onSubmit(); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.ts index 2fc8b13b540e6..99313a5923aab 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.ts @@ -25,7 +25,7 @@ export class HostFormComponent extends CdForm implements OnInit { hostnames: string[]; addr: string; status: string; - allLabels: any; + allLabels: string[]; pageURL: string; messages = new SelectMessages({ @@ -60,7 +60,6 @@ export class HostFormComponent extends CdForm implements OnInit { } private createForm() { - const disableMaintenance = this.pageURL !== 'hosts'; this.hostForm = new CdFormGroup({ hostname: new FormControl('', { validators: [ @@ -74,7 +73,7 @@ export class HostFormComponent extends CdForm implements OnInit { validators: [CdValidators.ip()] }), labels: new FormControl([]), - maintenance: new FormControl({ value: disableMaintenance, disabled: disableMaintenance }) + maintenance: new FormControl({ value: false, disabled: this.pageURL !== 'hosts' }) }); } @@ -83,6 +82,9 @@ export class HostFormComponent extends CdForm implements OnInit { this.addr = this.hostForm.get('addr').value; this.status = this.hostForm.get('maintenance').value ? 'maintenance' : ''; this.allLabels = this.hostForm.get('labels').value; + if (this.pageURL !== 'hosts' && !this.allLabels.includes('_no_schedule')) { + this.allLabels.push('_no_schedule'); + } this.taskWrapper .wrapTaskAroundCall({ task: new FinishedTask('host/' + URLVerbs.ADD, { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts index 1a3798b8ff9c0..c70f755f799e8 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts @@ -115,10 +115,8 @@ export class HostsComponent extends ListWithDetails implements OnInit { icon: Icons.enter, click: () => this.hostMaintenance(), disable: (selection: CdTableSelection) => - this.getDisable('maintenance', selection) || - this.isExecuting || - this.enableButton || - this.clusterCreation + this.getDisable('maintenance', selection) || this.isExecuting || this.enableButton, + visible: () => !this.clusterCreation }, { name: this.actionLabels.EXIT_MAINTENANCE, @@ -126,10 +124,8 @@ export class HostsComponent extends ListWithDetails implements OnInit { icon: Icons.exit, click: () => this.hostMaintenance(), disable: (selection: CdTableSelection) => - this.getDisable('maintenance', selection) || - this.isExecuting || - !this.enableButton || - this.clusterCreation + this.getDisable('maintenance', selection) || this.isExecuting || this.enableButton, + visible: () => !this.clusterCreation } ]; } @@ -189,6 +185,13 @@ export class HostsComponent extends ListWithDetails implements OnInit { this.orchService.status().subscribe((status: OrchestratorStatus) => { this.orchStatus = status; }); + + if (this.clusterCreation) { + const hiddenColumns = ['services', 'ceph_version']; + this.columns = this.columns.filter((col: any) => { + return !hiddenColumns.includes(col.prop); + }); + } } updateSelection(selection: CdTableSelection) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.html index 244bd8162cf4d..54cee708d2634 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.html @@ -4,7 +4,7 @@ [forceIdentifier]="true" [selectionType]="selectionType" columnMode="flex" - [autoReload]="false" + (fetchData)="getDevices()" [searchField]="false" (updateSelection)="updateSelection($event)" (columnFiltersChanged)="onColumnFiltersChanged($event)"> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.spec.ts index 5fce0381cd2a5..29a3ece96d8ae 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.spec.ts @@ -7,6 +7,7 @@ import { RouterTestingModule } from '@angular/router/testing'; import { ToastrModule } from 'ngx-toastr'; +import { HostService } from '~/app/shared/api/host.service'; import { OrchestratorService } from '~/app/shared/api/orchestrator.service'; import { TableActionsComponent } from '~/app/shared/datatable/table-actions/table-actions.component'; import { CdTableAction } from '~/app/shared/models/cd-table-action'; @@ -23,6 +24,7 @@ describe('InventoryDevicesComponent', () => { let component: InventoryDevicesComponent; let fixture: ComponentFixture; let orchService: OrchestratorService; + let hostService: HostService; const fakeAuthStorageService = { getPermissions: () => { @@ -59,6 +61,7 @@ describe('InventoryDevicesComponent', () => { beforeEach(() => { fixture = TestBed.createComponent(InventoryDevicesComponent); component = fixture.componentInstance; + hostService = TestBed.inject(HostService); orchService = TestBed.inject(OrchestratorService); }); @@ -70,6 +73,15 @@ describe('InventoryDevicesComponent', () => { expect(component.columns.every((column) => Boolean(column.prop))).toBeTruthy(); }); + it('should call inventoryDataList only when showOnlyAvailableData is true', () => { + const hostServiceSpy = spyOn(hostService, 'inventoryDeviceList').and.callThrough(); + component.getDevices(); + expect(hostServiceSpy).toBeCalledTimes(0); + component.showAvailDeviceOnly = true; + component.getDevices(); + expect(hostServiceSpy).toBeCalledTimes(1); + }); + describe('table actions', () => { const fakeDevices = require('./fixtures/inventory_list_response.json'); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.ts index fa778d5b4f29b..e0d82cb1975db 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.ts @@ -43,6 +43,7 @@ export class InventoryDevicesComponent implements OnInit, OnDestroy { // Devices @Input() devices: InventoryDevice[] = []; + @Input() showAvailDeviceOnly = false; // Do not display these columns @Input() hiddenColumns: string[] = []; @@ -175,6 +176,22 @@ export class InventoryDevicesComponent implements OnInit, OnDestroy { } } + getDevices() { + if (this.showAvailDeviceOnly) { + this.hostService.inventoryDeviceList().subscribe( + (devices: InventoryDevice[]) => { + this.devices = _.filter(devices, 'available'); + this.devices = [...this.devices]; + }, + () => { + this.devices = []; + } + ); + } else { + this.devices = [...this.devices]; + } + } + ngOnDestroy() { if (this.fetchInventorySub) { this.fetchInventorySub.unsubscribe(); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.spec.ts index da24403dee1e1..dd60f7959fd9d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.spec.ts @@ -52,17 +52,16 @@ describe('InventoryComponent', () => { describe('after ngOnInit', () => { it('should load devices', () => { fixture.detectChanges(); - expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(1, undefined, false); component.refresh(); // click refresh button - expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(2, undefined, true); + expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(1, undefined, false); const newHost = 'host0'; component.hostname = newHost; fixture.detectChanges(); component.ngOnChanges(); - expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(3, newHost, false); + expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(2, newHost, false); component.refresh(); // click refresh button - expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(4, newHost, true); + expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(3, newHost, true); }); }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.html index 30effc21b53e2..3e53d5c410cd2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.html @@ -20,6 +20,7 @@ diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.ts index fe77a4fe4d8a0..1909803dc3380 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.ts @@ -11,6 +11,7 @@ import { Icons } from '~/app/shared/enum/icons.enum'; import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { CdTableColumnFiltersChange } from '~/app/shared/models/cd-table-column-filters-change'; +import { WizardStepsService } from '~/app/shared/services/wizard-steps.service'; @Component({ selector: 'cd-osd-devices-selection-modal', @@ -42,7 +43,8 @@ export class OsdDevicesSelectionModalComponent implements AfterViewInit { constructor( private formBuilder: CdFormBuilder, public activeModal: NgbActiveModal, - public actionLabels: ActionLabelsI18n + public actionLabels: ActionLabelsI18n, + public wizardStepService: WizardStepsService ) { this.action = actionLabels.ADD; this.createForm(); @@ -80,6 +82,8 @@ export class OsdDevicesSelectionModalComponent implements AfterViewInit { this.filteredDevices = event.data; this.capacity = _.sumBy(this.filteredDevices, 'sys_api.size'); this.event = event; + this.wizardStepService.osdDevices = this.filteredDevices; + this.wizardStepService.osdCapacity = this.capacity; } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.html index 390f673bdaf5b..675e20fcf59a6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.html @@ -7,8 +7,9 @@ [formGroup]="form" novalidate>
    -
    {{ action | titlecase }} {{ resource | upperFirst }}
    +
    {{ action | titlecase }} {{ resource | upperFirst }}
    -