Skip to content

Commit 41d54ba

Browse files
authored
stable-25-3: cherry pick PR 27297, PR 28613, PR 29360, PR 29466 (#29171)
2 parents fa180ee + 7d491af commit 41d54ba

File tree

11 files changed

+578
-11
lines changed

11 files changed

+578
-11
lines changed

ydb/core/blobstorage/nodewarden/blobstorage_node_warden_ut.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77
#include <ydb/core/blobstorage/base/blobstorage_events.h>
88
#include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.h>
99
#include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_http_request.h>
10+
#include <ydb/core/blobstorage/vdisk/localrecovery/localrecovery_public.h>
11+
#include <ydb/core/blobstorage/vdisk/common/vdisk_events.h>
1012
#include <ydb/core/mind/bscontroller/bsc.h>
1113
#include <ydb/core/util/actorsys_test/testactorsys.h>
1214

1315
#include <ydb/library/pdisk_io/sector_map.h>
16+
#include <ydb/core/testlib/actors/block_events.h>
1417
#include <ydb/core/util/random.h>
1518

1619
#include <google/protobuf/text_format.h>
@@ -1119,6 +1122,79 @@ Y_UNIT_TEST_SUITE(TBlobStorageWardenTest) {
11191122
CheckInferredPDiskSettings(runtime, fakeWhiteboard, fakeNodeWarden,
11201123
pdiskId, 12, 2u);
11211124
}
1125+
1126+
void ChangeGroupSizeInUnits(TTestBasicRuntime& runtime, TString poolName, ui32 groupId, ui32 groupSizeInUnits) {
1127+
TActorId edge = runtime.AllocateEdgeActor();
1128+
1129+
auto storagePool = DescribeStoragePool(runtime, poolName);
1130+
auto request = std::make_unique<TEvBlobStorage::TEvControllerConfigRequest>();
1131+
auto& cmd = *request->Record.MutableRequest()->AddCommand()->MutableChangeGroupSizeInUnits();
1132+
cmd.SetBoxId(storagePool.GetBoxId());
1133+
cmd.SetItemConfigGeneration(storagePool.GetItemConfigGeneration());
1134+
cmd.SetStoragePoolId(storagePool.GetStoragePoolId());
1135+
cmd.AddGroupId(groupId);
1136+
cmd.SetSizeInUnits(groupSizeInUnits);
1137+
1138+
NTabletPipe::TClientConfig pipeConfig;
1139+
pipeConfig.RetryPolicy = NTabletPipe::TClientRetryPolicy::WithRetries();
1140+
runtime.SendToPipe(MakeBSControllerID(), edge, request.release(), 0, pipeConfig);
1141+
1142+
auto reply = runtime.GrabEdgeEventRethrow<TEvBlobStorage::TEvControllerConfigResponse>(edge);
1143+
VERBOSE_COUT("TEvControllerConfigResponse# " << reply->ToString());
1144+
UNIT_ASSERT_VALUES_EQUAL(reply->Get()->Record.GetResponse().GetSuccess(), true);
1145+
}
1146+
1147+
void CheckVDiskStateUpdate(TTestBasicRuntime& runtime, TActorId fakeWhiteboard, ui32 groupId,
1148+
ui32 expectedGroupGeneration, ui32 expectedGroupSizeInUnits,
1149+
TDuration simTimeout = TDuration::Seconds(10)) {
1150+
TInstant deadline = runtime.GetCurrentTime() + simTimeout;
1151+
while (true) {
1152+
UNIT_ASSERT_LT(runtime.GetCurrentTime(), deadline);
1153+
1154+
const auto ev = runtime.GrabEdgeEventRethrow<NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateUpdate>(fakeWhiteboard, deadline - runtime.GetCurrentTime());
1155+
VERBOSE_COUT(" Got TEvVDiskStateUpdate# " << ev->ToString());
1156+
1157+
NKikimrWhiteboard::TVDiskStateInfo vdiskInfo = ev->Get()->Record;
1158+
if (vdiskInfo.GetVDiskId().GetGroupID() != groupId || !vdiskInfo.HasGroupSizeInUnits()) {
1159+
continue;
1160+
}
1161+
1162+
UNIT_ASSERT_VALUES_EQUAL(vdiskInfo.GetVDiskId().GetGroupGeneration(), expectedGroupGeneration);
1163+
UNIT_ASSERT_VALUES_EQUAL(vdiskInfo.GetGroupSizeInUnits(), expectedGroupSizeInUnits);
1164+
break;
1165+
}
1166+
}
1167+
1168+
CUSTOM_UNIT_TEST(TestEvVGenerationChangeRace) {
1169+
TTestBasicRuntime runtime(1, false);
1170+
Setup(runtime, "", nullptr);
1171+
runtime.SetLogPriority(NKikimrServices::BS_PROXY, NLog::PRI_ERROR);
1172+
runtime.SetLogPriority(NKikimrServices::BS_PROXY_PUT, NLog::PRI_ERROR);
1173+
runtime.SetLogPriority(NKikimrServices::BS_PROXY_BLOCK, NLog::PRI_ERROR);
1174+
runtime.SetLogPriority(NKikimrServices::BS_SKELETON, NLog::PRI_INFO);
1175+
runtime.SetLogPriority(NKikimrServices::BS_LOCALRECOVERY, NLog::PRI_INFO);
1176+
runtime.SetLogPriority(NKikimrServices::BS_NODE, NLog::PRI_INFO);
1177+
runtime.SetLogPriority(NKikimrServices::BS_CONTROLLER, NLog::PRI_INFO);
1178+
1179+
const ui32 nodeId = runtime.GetNodeId(0);
1180+
TActorId fakeWhiteboard = runtime.AllocateEdgeActor();
1181+
runtime.RegisterService(NNodeWhiteboard::MakeNodeWhiteboardServiceId(nodeId), fakeWhiteboard);
1182+
1183+
VERBOSE_COUT(" Starting test");
1184+
1185+
TBlockEvents<TEvBlobStorage::TEvLocalRecoveryDone> block(runtime);
1186+
1187+
const TString poolName = "testEvVGenerationChangeRace";
1188+
CreateStoragePool(runtime, poolName, "pool-kind-1");
1189+
ui32 groupId = GetGroupFromPool(runtime, poolName);
1190+
1191+
CheckVDiskStateUpdate(runtime, fakeWhiteboard, groupId, 1, 0u);
1192+
ChangeGroupSizeInUnits(runtime, poolName, groupId, 2u);
1193+
CheckVDiskStateUpdate(runtime, fakeWhiteboard, groupId, 1, 0u);
1194+
block.Stop().Unblock();
1195+
CheckVDiskStateUpdate(runtime, fakeWhiteboard, groupId, 2, 2u);
1196+
}
1197+
11221198
}
11231199

11241200
} // namespace NBlobStorageNodeWardenTest

ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1927,6 +1927,17 @@ namespace NKikimr {
19271927
ApplyHugeBlobSize(Config->MinHugeBlobInBytes);
19281928
Y_VERIFY_S(MinHugeBlobInBytes, VCtx->VDiskLogPrefix);
19291929

1930+
if (Config->GroupSizeInUnits != GInfo->GroupSizeInUnits) {
1931+
Config->GroupSizeInUnits = GInfo->GroupSizeInUnits;
1932+
Y_VERIFY(PDiskCtx);
1933+
Y_VERIFY(PDiskCtx->Dsk);
1934+
ctx.Send(PDiskCtx->PDiskId,
1935+
new NPDisk::TEvYardResize(
1936+
PDiskCtx->Dsk->Owner,
1937+
PDiskCtx->Dsk->OwnerRound,
1938+
Config->GroupSizeInUnits));
1939+
}
1940+
19301941
// handle special case when donor disk starts and finds out that it has been wiped out
19311942
if (ev->Get()->LsnMngr->GetOriginallyRecoveredLsn() == 0 && Config->BaseInfo.DonorMode) {
19321943
// send drop donor cmd to NodeWarden
@@ -2444,10 +2455,11 @@ namespace NKikimr {
24442455
GInfo = msg->NewInfo;
24452456
SelfVDiskId = msg->NewVDiskId;
24462457

2447-
if (Config->GroupSizeInUnits != GInfo->GroupSizeInUnits) {
2458+
if (PDiskCtx && Config->GroupSizeInUnits != GInfo->GroupSizeInUnits) {
24482459
Config->GroupSizeInUnits = GInfo->GroupSizeInUnits;
24492460
UpdateWhiteboard(ctx);
24502461

2462+
Y_VERIFY(PDiskCtx->Dsk);
24512463
ctx.Send(PDiskCtx->PDiskId,
24522464
new NPDisk::TEvYardResize(
24532465
PDiskCtx->Dsk->Owner,
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
# -*- coding: utf-8 -*-
2+
import pytest
3+
import time
4+
import logging
5+
6+
from ydb.tests.library.compatibility.fixtures import RestartToAnotherVersionFixture
7+
from ydb.tests.library.compatibility.fixtures import init_stable_binary_path, init_stable_name
8+
from ydb.tests.library.compatibility.fixtures import inter_stable_binary_path, inter_stable_name
9+
from ydb.tests.library.compatibility.fixtures import current_binary_path, current_name
10+
from ydb.tests.library.common.types import Erasure
11+
from ydb.core.protos import blobstorage_config_pb2
12+
13+
logger = logging.getLogger(__name__)
14+
15+
CONST_PDISK_PATH = "SectorMap:TestInferPDiskSettings:480"
16+
CONST_EXPECTED_SLOT_COUNT = 14
17+
CONST_480_GB = 480 * 1024**3
18+
CONST_10_GB = 10 * 1024**3
19+
20+
all_binary_combinations_restart = [
21+
[init_stable_binary_path, inter_stable_binary_path],
22+
[inter_stable_binary_path, current_binary_path],
23+
[init_stable_binary_path, current_binary_path],
24+
]
25+
all_binary_combinations_ids_restart = [
26+
"restart_{}_to_{}".format(init_stable_name, inter_stable_name),
27+
"restart_{}_to_{}".format(inter_stable_name, current_name),
28+
"restart_{}_to_{}".format(init_stable_name, current_name),
29+
]
30+
31+
32+
@pytest.mark.parametrize("base_setup",
33+
argvalues=all_binary_combinations_restart,
34+
ids=all_binary_combinations_ids_restart,
35+
indirect=True)
36+
class TestUpgradeThenRollback(RestartToAnotherVersionFixture):
37+
@pytest.fixture(autouse=True, scope="function")
38+
def setup(self):
39+
cluster_generator = self.setup_cluster(
40+
erasure=Erasure.NONE,
41+
nodes=2,
42+
use_in_memory_pdisks=False)
43+
next(cluster_generator)
44+
45+
host_configs = self.cluster.client.read_host_configs()
46+
for host_config in host_configs:
47+
drive = host_config.Drive.add()
48+
drive.Path = CONST_PDISK_PATH
49+
drive.PDiskConfig.ExpectedSlotCount = CONST_EXPECTED_SLOT_COUNT
50+
self.cluster.client.define_host_configs(host_configs)
51+
52+
yield
53+
54+
def pdisk_list(self):
55+
"""Equivalent to `dstool pdisk list`"""
56+
base_config = self.cluster.client.query_base_config()
57+
58+
# Collect PDisk information
59+
pdisks_info = []
60+
for pdisk in base_config.BaseConfig.PDisk:
61+
if pdisk.Path != CONST_PDISK_PATH:
62+
continue
63+
pdisks_info.append(pdisk)
64+
return pdisks_info
65+
66+
def wait_and_check_pdisk_list(self, check_pdisks_fn, deadline, delay=1):
67+
while True:
68+
pdisks = self.pdisk_list()
69+
try:
70+
check_pdisks_fn(pdisks)
71+
logger.info(f"pdisk_list good: {pdisks}")
72+
return
73+
except AssertionError as e:
74+
if time.time() > deadline:
75+
logger.warning(f"pdisk_list incorrect: {pdisks}")
76+
raise e
77+
else:
78+
time.sleep(delay)
79+
80+
def test(self):
81+
assert self.current_binary_paths_index == 0
82+
logger.info(f"Test started on {self.versions[0]} {time.time()=}")
83+
#################################################################
84+
85+
t1 = time.time()
86+
timeout = 20
87+
88+
def check_pdisks(pdisks):
89+
for pdisk in pdisks:
90+
assert pdisk.Path == CONST_PDISK_PATH
91+
assert pdisk.PDiskConfig.ExpectedSlotCount == CONST_EXPECTED_SLOT_COUNT
92+
assert pdisk.DriveStatus == blobstorage_config_pb2.EDriveStatus.ACTIVE
93+
assert pdisk.PDiskMetrics.TotalSize == CONST_480_GB
94+
if self.versions[0] < (25, 3):
95+
assert not pdisk.PDiskMetrics.HasField('SlotCount')
96+
assert not pdisk.PDiskMetrics.HasField('SlotSizeInUnits')
97+
else:
98+
assert pdisk.PDiskMetrics.SlotCount == CONST_EXPECTED_SLOT_COUNT
99+
assert pdisk.PDiskMetrics.HasField('SlotSizeInUnits') and \
100+
pdisk.PDiskMetrics.SlotSizeInUnits == 0
101+
assert pdisk.PDiskMetrics.UpdateTimestamp * 1e-6 > t1
102+
assert pdisk.PDiskMetrics.UpdateTimestamp * 1e-6 < t1 + timeout
103+
self.wait_and_check_pdisk_list(check_pdisks, deadline=t1+timeout)
104+
105+
self.change_cluster_version()
106+
assert self.current_binary_paths_index == 1
107+
logger.info(f"Restarted on version {self.versions[1]} {time.time()=}")
108+
######################################################################
109+
110+
t2 = time.time()
111+
host_configs = self.cluster.client.read_host_configs()
112+
for host_config in host_configs:
113+
drive = host_config.Drive[1]
114+
assert drive.Path == CONST_PDISK_PATH
115+
drive.ClearField('PDiskConfig')
116+
drive.PDiskConfig.SetInParent()
117+
drive.InferPDiskSlotCountFromUnitSize = CONST_10_GB
118+
drive.InferPDiskSlotCountMax = 32
119+
self.cluster.client.define_host_configs(host_configs)
120+
logger.info(f"Inferred PDisk setting applied {time.time()=}")
121+
122+
self.cluster.client.pdisk_set_all_active(pdisk_path=CONST_PDISK_PATH)
123+
logger.info(f"Drives activated {time.time()=}")
124+
125+
deadline = time.time() + timeout
126+
127+
def check_pdisks(pdisks):
128+
for pdisk in pdisks:
129+
assert pdisk.Path == CONST_PDISK_PATH
130+
assert pdisk.DriveStatus == blobstorage_config_pb2.EDriveStatus.ACTIVE
131+
assert not pdisk.HasField('PDiskConfig')
132+
assert pdisk.ExpectedSlotCount == 16 # hardcoded default
133+
assert pdisk.PDiskMetrics.TotalSize == CONST_480_GB
134+
assert pdisk.PDiskMetrics.SlotCount == 24
135+
assert pdisk.PDiskMetrics.SlotSizeInUnits == 2
136+
assert pdisk.InferPDiskSlotCountFromUnitSize == CONST_10_GB
137+
assert pdisk.InferPDiskSlotCountMax == 32
138+
assert pdisk.PDiskMetrics.UpdateTimestamp * 1e-6 > t2
139+
assert pdisk.PDiskMetrics.UpdateTimestamp * 1e-6 < deadline
140+
self.wait_and_check_pdisk_list(check_pdisks, deadline)
141+
142+
t3 = time.time()
143+
self.change_cluster_version()
144+
assert self.current_binary_paths_index == 0
145+
logger.info(f"Restarted back on version {self.versions[0]} {time.time()=}")
146+
###########################################################################
147+
148+
self.cluster.client.pdisk_set_all_active(pdisk_path=CONST_PDISK_PATH)
149+
logger.info(f"Drives activated {time.time()=}")
150+
151+
deadline = time.time() + timeout
152+
153+
def check_pdisks(pdisks):
154+
for pdisk in pdisks:
155+
assert pdisk.Path == CONST_PDISK_PATH
156+
assert pdisk.DriveStatus == blobstorage_config_pb2.EDriveStatus.ACTIVE
157+
assert not pdisk.HasField('PDiskConfig')
158+
assert pdisk.ExpectedSlotCount == 16 # hardcoded default
159+
assert pdisk.PDiskMetrics.TotalSize == CONST_480_GB
160+
if self.versions[0] < (25, 3):
161+
assert not pdisk.PDiskMetrics.HasField('SlotCount')
162+
assert not pdisk.PDiskMetrics.HasField('SlotSizeInUnits')
163+
assert pdisk.InferPDiskSlotCountFromUnitSize == 0
164+
assert pdisk.InferPDiskSlotCountMax == 0
165+
else:
166+
assert pdisk.PDiskMetrics.HasField('SlotCount') and pdisk.PDiskMetrics.SlotCount == 24
167+
assert pdisk.PDiskMetrics.HasField('SlotSizeInUnits') and pdisk.PDiskMetrics.SlotSizeInUnits == 2
168+
assert pdisk.InferPDiskSlotCountFromUnitSize == CONST_10_GB
169+
assert pdisk.InferPDiskSlotCountMax == 32
170+
assert pdisk.PDiskMetrics.UpdateTimestamp * 1e-6 > t3
171+
assert pdisk.PDiskMetrics.UpdateTimestamp * 1e-6 < deadline
172+
self.wait_and_check_pdisk_list(check_pdisks, deadline)

ydb/tests/compatibility/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ TEST_SRCS(
2626
test_node_broker_delta_protocol.py
2727
test_table_schema_compatibility.py
2828
test_workload_manager.py
29+
test_infer_pdisk_expected_slot_count.py
2930
udf/test_datetime2.py
3031
udf/test_digest.py
3132
udf/test_digest_regression.py

0 commit comments

Comments
 (0)