Skip to content

Commit 9abce95

Browse files
CopilotGazizonoki
authored andcommitted
Add fulltext index C++ SDK support (#27727)
1 parent 63c53f4 commit 9abce95

File tree

5 files changed

+381
-11
lines changed

5 files changed

+381
-11
lines changed

.github/last_commit.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
017cbe9b2d0364e7a6c3922c3be64c20cc502810
1+
ce896ce573cceed244362a9ad53e8fff47f77cf2

include/ydb-cpp-sdk/client/table/table.h

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class ExplicitPartitions;
3131
class GlobalIndexSettings;
3232
class VectorIndexSettings;
3333
class KMeansTreeSettings;
34+
class FulltextIndexSettings;
3435
class PartitioningSettings;
3536
class ReadReplicasSettings;
3637
class DateTypeColumnModeSettings;
@@ -295,6 +296,49 @@ struct TKMeansTreeSettings {
295296
void Out(IOutputStream &o) const;
296297
};
297298

299+
struct TFulltextIndexSettings {
300+
public:
301+
enum class ELayout {
302+
Unspecified = 0,
303+
Flat,
304+
};
305+
306+
enum class ETokenizer {
307+
Unspecified = 0,
308+
Whitespace,
309+
Standard,
310+
Keyword,
311+
};
312+
313+
struct TAnalyzers {
314+
std::optional<ETokenizer> Tokenizer;
315+
std::optional<std::string> Language;
316+
std::optional<bool> UseFilterLowercase;
317+
std::optional<bool> UseFilterStopwords;
318+
std::optional<bool> UseFilterNgram;
319+
std::optional<bool> UseFilterEdgeNgram;
320+
std::optional<int32_t> FilterNgramMinLength;
321+
std::optional<int32_t> FilterNgramMaxLength;
322+
std::optional<bool> UseFilterLength;
323+
std::optional<int32_t> FilterLengthMin;
324+
std::optional<int32_t> FilterLengthMax;
325+
};
326+
327+
struct TColumnAnalyzers {
328+
std::optional<std::string> Column;
329+
std::optional<TAnalyzers> Analyzers;
330+
};
331+
332+
std::optional<ELayout> Layout;
333+
std::vector<TColumnAnalyzers> Columns;
334+
335+
static TFulltextIndexSettings FromProto(const Ydb::Table::FulltextIndexSettings& proto);
336+
337+
void SerializeTo(Ydb::Table::FulltextIndexSettings& settings) const;
338+
339+
void Out(IOutputStream& o) const;
340+
};
341+
298342
//! Represents index description
299343
class TIndexDescription {
300344
friend class NYdb::TProtoAccessor;
@@ -306,7 +350,7 @@ class TIndexDescription {
306350
const std::vector<std::string>& indexColumns,
307351
const std::vector<std::string>& dataColumns = {},
308352
const std::vector<TGlobalIndexSettings>& globalIndexSettings = {},
309-
const std::variant<std::monostate, TKMeansTreeSettings>& specializedIndexSettings = {}
353+
const std::variant<std::monostate, TKMeansTreeSettings, TFulltextIndexSettings>& specializedIndexSettings = {}
310354
);
311355

312356
TIndexDescription(
@@ -320,7 +364,7 @@ class TIndexDescription {
320364
EIndexType GetIndexType() const;
321365
const std::vector<std::string>& GetIndexColumns() const;
322366
const std::vector<std::string>& GetDataColumns() const;
323-
const std::variant<std::monostate, TKMeansTreeSettings>& GetIndexSettings() const;
367+
const std::variant<std::monostate, TKMeansTreeSettings, TFulltextIndexSettings>& GetIndexSettings() const;
324368
uint64_t GetSizeBytes() const;
325369

326370
void SerializeTo(Ydb::Table::TableIndex& proto) const;
@@ -340,7 +384,7 @@ class TIndexDescription {
340384
std::vector<std::string> IndexColumns_;
341385
std::vector<std::string> DataColumns_;
342386
std::vector<TGlobalIndexSettings> GlobalIndexSettings_;
343-
std::variant<std::monostate, TKMeansTreeSettings> SpecializedIndexSettings_;
387+
std::variant<std::monostate, TKMeansTreeSettings, TFulltextIndexSettings> SpecializedIndexSettings_;
344388
uint64_t SizeBytes_ = 0;
345389
};
346390

@@ -755,6 +799,9 @@ class TTableDescription {
755799
// vector KMeansTree
756800
void AddVectorKMeansTreeIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TKMeansTreeSettings& indexSettings);
757801
void AddVectorKMeansTreeIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TKMeansTreeSettings& indexSettings);
802+
// fulltext
803+
void AddFulltextIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TFulltextIndexSettings& indexSettings);
804+
void AddFulltextIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TFulltextIndexSettings& indexSettings);
758805

759806
// default
760807
void AddSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns);
@@ -996,6 +1043,10 @@ class TTableBuilder {
9961043
TTableBuilder& AddVectorKMeansTreeIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TKMeansTreeSettings& indexSettings);
9971044
TTableBuilder& AddVectorKMeansTreeIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TKMeansTreeSettings& indexSettings);
9981045

1046+
// fulltext
1047+
TTableBuilder& AddFulltextIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const TFulltextIndexSettings& indexSettings);
1048+
TTableBuilder& AddFulltextIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns, const TFulltextIndexSettings& indexSettings);
1049+
9991050
// default
10001051
TTableBuilder& AddSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns, const std::vector<std::string>& dataColumns);
10011052
TTableBuilder& AddSecondaryIndex(const std::string& indexName, const std::vector<std::string>& indexColumns);

include/ydb-cpp-sdk/client/table/table_enum.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ enum class EIndexType {
3535
GlobalAsync,
3636
GlobalUnique,
3737
GlobalVectorKMeansTree,
38+
GlobalFulltext,
3839

3940
Unknown = std::numeric_limits<int>::max()
4041
};

src/client/table/out.cpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,90 @@ Y_DECLARE_OUT_SPEC(, NYdb::NTable::TKMeansTreeSettings, stream, value) {
8383
", levels: " << value.Levels <<
8484
" }";
8585
}
86+
87+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TFulltextIndexSettings::ELayout, stream, value) {
88+
switch (value) {
89+
case NYdb::NTable::TFulltextIndexSettings::ELayout::Flat:
90+
stream << "flat";
91+
break;
92+
case NYdb::NTable::TFulltextIndexSettings::ELayout::Unspecified:
93+
stream << "unspecified";
94+
break;
95+
}
96+
}
97+
98+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TFulltextIndexSettings::ETokenizer, stream, value) {
99+
switch (value) {
100+
case NYdb::NTable::TFulltextIndexSettings::ETokenizer::Whitespace:
101+
stream << "whitespace";
102+
break;
103+
case NYdb::NTable::TFulltextIndexSettings::ETokenizer::Standard:
104+
stream << "standard";
105+
break;
106+
case NYdb::NTable::TFulltextIndexSettings::ETokenizer::Keyword:
107+
stream << "keyword";
108+
break;
109+
case NYdb::NTable::TFulltextIndexSettings::ETokenizer::Unspecified:
110+
stream << "unspecified";
111+
break;
112+
}
113+
}
114+
115+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TFulltextIndexSettings::TAnalyzers, stream, value) {
116+
stream << "{ tokenizer: " << value.Tokenizer.value_or(NYdb::NTable::TFulltextIndexSettings::ETokenizer::Unspecified);
117+
if (value.Language.has_value()) {
118+
stream << ", language: " << *value.Language;
119+
}
120+
if (value.UseFilterLowercase.has_value()) {
121+
stream << ", use_filter_lowercase: " << (*value.UseFilterLowercase ? "true" : "false");
122+
}
123+
if (value.UseFilterStopwords.has_value()) {
124+
stream << ", use_filter_stopwords: " << (*value.UseFilterStopwords ? "true" : "false");
125+
}
126+
if (value.UseFilterNgram.has_value()) {
127+
stream << ", use_filter_ngram: " << (*value.UseFilterNgram ? "true" : "false");
128+
}
129+
if (value.UseFilterEdgeNgram.has_value()) {
130+
stream << ", use_filter_edge_ngram: " << (*value.UseFilterEdgeNgram ? "true" : "false");
131+
}
132+
if (value.FilterNgramMinLength.has_value()) {
133+
stream << ", filter_ngram_min_length: " << *value.FilterNgramMinLength;
134+
}
135+
if (value.FilterNgramMaxLength.has_value()) {
136+
stream << ", filter_ngram_max_length: " << *value.FilterNgramMaxLength;
137+
}
138+
if (value.UseFilterLength.has_value()) {
139+
stream << ", use_filter_length: " << (*value.UseFilterLength ? "true" : "false");
140+
}
141+
if (value.FilterLengthMin.has_value()) {
142+
stream << ", filter_length_min: " << *value.FilterLengthMin;
143+
}
144+
if (value.FilterLengthMax.has_value()) {
145+
stream << ", filter_length_max: " << *value.FilterLengthMax;
146+
}
147+
stream << " }";
148+
}
149+
150+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TFulltextIndexSettings::TColumnAnalyzers, stream, value) {
151+
stream << "{ ";
152+
if (value.Column.has_value()) {
153+
stream << "column: " << *value.Column << ", ";
154+
}
155+
if (value.Analyzers.has_value()) {
156+
stream << "analyzers: " << *value.Analyzers;
157+
}
158+
stream << " }";
159+
}
160+
161+
Y_DECLARE_OUT_SPEC(, NYdb::NTable::TFulltextIndexSettings, stream, value) {
162+
stream << "{ layout: " << value.Layout.value_or(NYdb::NTable::TFulltextIndexSettings::ELayout::Unspecified);
163+
if (!value.Columns.empty()) {
164+
stream << ", columns: [";
165+
for (size_t i = 0; i < value.Columns.size(); ++i) {
166+
if (i > 0) stream << ", ";
167+
stream << value.Columns[i];
168+
}
169+
stream << "]";
170+
}
171+
stream << " }";
172+
}

0 commit comments

Comments
 (0)