Skip to content

Commit 958b527

Browse files
Enmkianton-ru
authored andcommitted
Merge pull request #798 from Altinity/feature/antalya-25.3/alternative_syntax
25.3 Antalya port - Alternative syntax for cluster functions
1 parent 809953b commit 958b527

File tree

62 files changed

+1914
-429
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1914
-429
lines changed

src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,14 @@ class FunctionTreeNodeImpl : public AbstractFunction
7171
{
7272
public:
7373
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
74-
size_t size() const override { return arguments ? arguments->size() : 0; }
75-
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
74+
size_t size() const override
75+
{ /// size withous skipped indexes
76+
return arguments ? arguments->size() - skippedSize() : 0;
77+
}
78+
std::unique_ptr<Argument> at(size_t n) const override
79+
{ /// n is relative index, some can be skipped
80+
return std::make_unique<ArgumentTreeNode>(arguments->at(getRealIndex(n)).get());
81+
}
7682
private:
7783
const QueryTreeNodes * arguments = nullptr;
7884
};

src/Core/Settings.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6841,6 +6841,15 @@ Enable PRQL - an alternative to SQL.
68416841
)", EXPERIMENTAL) \
68426842
DECLARE(Bool, enable_adaptive_memory_spill_scheduler, false, R"(
68436843
Trigger processor to spill data into external storage adpatively. grace join is supported at present.
6844+
)", EXPERIMENTAL) \
6845+
DECLARE(String, object_storage_cluster, "", R"(
6846+
Cluster to make distributed requests to object storages with alternative syntax.
6847+
)", EXPERIMENTAL) \
6848+
DECLARE(UInt64, object_storage_max_nodes, 0, R"(
6849+
Limit for hosts used for request in object storage cluster table functions - azureBlobStorageCluster, s3Cluster, hdfsCluster, etc.
6850+
Possible values:
6851+
- Positive integer.
6852+
- 0 — All hosts in cluster.
68446853
)", EXPERIMENTAL) \
68456854
DECLARE(Bool, allow_experimental_delta_kernel_rs, true, R"(
68466855
Allow experimental delta-kernel-rs implementation.

src/Core/SettingsChangesHistory.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
6767
/// controls new feature and it's 'true' by default, use 'false' as previous_value).
6868
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
6969
/// Note: please check if the key already exists to prevent duplicate entries.
70+
addSettingsChanges(settings_changes_history, "25.6.2.20000",
71+
{
72+
// Altinity Antalya modifications atop of 25.6
73+
{"object_storage_cluster", "", "", "New setting"},
74+
{"object_storage_max_nodes", 0, 0, "New setting"},
75+
});
7076
addSettingsChanges(settings_changes_history, "25.6",
7177
{
7278
{"output_format_native_use_flattened_dynamic_and_json_serialization", false, false, "Add flattened Dynamic/JSON serializations to Native format"},

src/Databases/DataLake/DatabaseDataLake.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <Storages/ConstraintsDescription.h>
1919
#include <Storages/StorageNull.h>
2020
#include <Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h>
21+
#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
2122

2223
#include <Interpreters/evaluateConstantExpression.h>
2324
#include <Interpreters/Context.h>
@@ -43,6 +44,7 @@ namespace DatabaseDataLakeSetting
4344
extern const DatabaseDataLakeSettingsString storage_endpoint;
4445
extern const DatabaseDataLakeSettingsString oauth_server_uri;
4546
extern const DatabaseDataLakeSettingsBool vended_credentials;
47+
extern const DatabaseDataLakeSettingsString object_storage_cluster;
4648

4749

4850
extern const DatabaseDataLakeSettingsString aws_access_key_id;
@@ -428,22 +430,22 @@ StoragePtr DatabaseDataLake::tryGetTableImpl(const String & name, ContextPtr con
428430

429431
/// with_table_structure = false: because there will be
430432
/// no table structure in table definition AST.
431-
StorageObjectStorage::Configuration::initialize(*configuration, args, context_copy, /* with_table_structure */false);
433+
configuration->initialize(args, context_copy, /* with_table_structure */false);
432434

433-
return std::make_shared<StorageObjectStorage>(
435+
auto cluster_name = settings[DatabaseDataLakeSetting::object_storage_cluster].value;
436+
437+
return std::make_shared<StorageObjectStorageCluster>(
438+
cluster_name,
434439
configuration,
435440
configuration->createObjectStorage(context_copy, /* is_readonly */ false),
436-
context_copy,
437441
StorageID(getDatabaseName(), name),
438442
/* columns */columns,
439443
/* constraints */ConstraintsDescription{},
444+
/* partition_by */nullptr,
445+
context_copy,
440446
/* comment */"",
441447
getFormatSettings(context_copy),
442-
LoadingStrictnessLevel::CREATE,
443-
/* distributed_processing */false,
444-
/* partition_by */nullptr,
445-
/* is_table_function */false,
446-
/* lazy_init */true);
448+
LoadingStrictnessLevel::CREATE);
447449
}
448450

449451
DatabaseTablesIteratorPtr DatabaseDataLake::getTablesIterator(

src/Databases/DataLake/DatabaseDataLakeSettings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ namespace ErrorCodes
2727
DECLARE(String, aws_secret_access_key, "", "Key for AWS connection for Glue Catalog'", 0) \
2828
DECLARE(String, region, "", "Region for Glue catalog", 0) \
2929
DECLARE(String, storage_endpoint, "", "Object storage endpoint", 0) \
30+
DECLARE(String, object_storage_cluster, "", "Cluster for distributed requests", 0) \
3031

3132
#define LIST_OF_DATABASE_ICEBERG_SETTINGS(M, ALIAS) \
3233
DATABASE_ICEBERG_RELATED_SETTINGS(M, ALIAS) \

src/Disks/DiskType.cpp

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ namespace ErrorCodes
99
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
1010
}
1111

12-
MetadataStorageType metadataTypeFromString(const String & type)
12+
MetadataStorageType metadataTypeFromString(const std::string & type)
1313
{
1414
auto check_type = Poco::toLower(type);
1515
if (check_type == "local")
@@ -53,23 +53,47 @@ std::string DataSourceDescription::toString() const
5353
case DataSourceType::RAM:
5454
return "memory";
5555
case DataSourceType::ObjectStorage:
56-
{
57-
switch (object_storage_type)
58-
{
59-
case ObjectStorageType::S3:
60-
return "s3";
61-
case ObjectStorageType::HDFS:
62-
return "hdfs";
63-
case ObjectStorageType::Azure:
64-
return "azure_blob_storage";
65-
case ObjectStorageType::Local:
66-
return "local_blob_storage";
67-
case ObjectStorageType::Web:
68-
return "web";
69-
case ObjectStorageType::None:
70-
return "none";
71-
}
72-
}
56+
return DB::toString(object_storage_type);
7357
}
7458
}
59+
60+
ObjectStorageType objectStorageTypeFromString(const std::string & type)
61+
{
62+
auto check_type = Poco::toLower(type);
63+
if (check_type == "s3")
64+
return ObjectStorageType::S3;
65+
if (check_type == "hdfs")
66+
return ObjectStorageType::HDFS;
67+
if (check_type == "azure_blob_storage" || check_type == "azure")
68+
return ObjectStorageType::Azure;
69+
if (check_type == "local_blob_storage" || check_type == "local")
70+
return ObjectStorageType::Local;
71+
if (check_type == "web")
72+
return ObjectStorageType::Web;
73+
if (check_type == "none")
74+
return ObjectStorageType::None;
75+
76+
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
77+
"Unknown object storage type: {}", type);
78+
}
79+
80+
std::string toString(ObjectStorageType type)
81+
{
82+
switch (type)
83+
{
84+
case ObjectStorageType::S3:
85+
return "s3";
86+
case ObjectStorageType::HDFS:
87+
return "hdfs";
88+
case ObjectStorageType::Azure:
89+
return "azure_blob_storage";
90+
case ObjectStorageType::Local:
91+
return "local_blob_storage";
92+
case ObjectStorageType::Web:
93+
return "web";
94+
case ObjectStorageType::None:
95+
return "none";
96+
}
97+
}
98+
7599
}

src/Disks/DiskType.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ enum class MetadataStorageType : uint8_t
3434
Memory,
3535
};
3636

37-
MetadataStorageType metadataTypeFromString(const String & type);
38-
String toString(DataSourceType data_source_type);
37+
MetadataStorageType metadataTypeFromString(const std::string & type);
38+
39+
ObjectStorageType objectStorageTypeFromString(const std::string & type);
40+
std::string toString(ObjectStorageType type);
3941

4042
struct DataSourceDescription
4143
{

src/Interpreters/Cluster.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -732,9 +732,9 @@ void Cluster::initMisc()
732732
}
733733
}
734734

735-
std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard) const
735+
std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard, size_t max_hosts) const
736736
{
737-
return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings, max_replicas_from_shard)};
737+
return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings, max_replicas_from_shard, max_hosts)};
738738
}
739739

740740
std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
@@ -783,7 +783,7 @@ void shuffleReplicas(std::vector<Cluster::Address> & replicas, const Settings &
783783

784784
}
785785

786-
Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard)
786+
Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard, size_t max_hosts)
787787
{
788788
if (from.addresses_with_failover.empty())
789789
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cluster is empty");
@@ -805,6 +805,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
805805

806806
if (address.is_local)
807807
info.local_addresses.push_back(address);
808+
addresses_with_failover.emplace_back(Addresses({address}));
808809

809810
auto pool = ConnectionPoolFactory::instance().get(
810811
static_cast<unsigned>(settings[Setting::distributed_connections_pool_size]),
@@ -828,9 +829,6 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
828829
info.per_replica_pools = {std::move(pool)};
829830
info.default_database = address.default_database;
830831

831-
addresses_with_failover.emplace_back(Addresses{address});
832-
833-
slot_to_shard.insert(std::end(slot_to_shard), info.weight, shards_info.size());
834832
shards_info.emplace_back(std::move(info));
835833
}
836834
};
@@ -852,10 +850,37 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
852850
secret = from.secret;
853851
name = from.name;
854852

853+
constrainShardInfoAndAddressesToMaxHosts(max_hosts);
854+
855+
for (size_t i = 0; i < shards_info.size(); ++i)
856+
slot_to_shard.insert(std::end(slot_to_shard), shards_info[i].weight, i);
857+
855858
initMisc();
856859
}
857860

858861

862+
void Cluster::constrainShardInfoAndAddressesToMaxHosts(size_t max_hosts)
863+
{
864+
if (max_hosts == 0 || shards_info.size() <= max_hosts)
865+
return;
866+
867+
pcg64_fast gen{randomSeed()};
868+
std::shuffle(shards_info.begin(), shards_info.end(), gen);
869+
shards_info.resize(max_hosts);
870+
871+
AddressesWithFailover addresses_with_failover_;
872+
873+
UInt32 shard_num = 0;
874+
for (auto & shard_info : shards_info)
875+
{
876+
addresses_with_failover_.push_back(addresses_with_failover[shard_info.shard_num - 1]);
877+
shard_info.shard_num = ++shard_num;
878+
}
879+
880+
addresses_with_failover.swap(addresses_with_failover_);
881+
}
882+
883+
859884
Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector<size_t> & indices)
860885
{
861886
for (size_t index : indices)

src/Interpreters/Cluster.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ class Cluster
270270
std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
271271

272272
/// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
273-
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard = 0) const;
273+
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard = 0, size_t max_hosts = 0) const;
274274

275275
/// Returns false if cluster configuration doesn't allow to use it for cross-replication.
276276
/// NOTE: true does not mean, that it's actually a cross-replication cluster.
@@ -296,7 +296,7 @@ class Cluster
296296

297297
/// For getClusterWithReplicasAsShards implementation
298298
struct ReplicasAsShardsTag {};
299-
Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard);
299+
Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard, size_t max_hosts);
300300

301301
void addShard(
302302
const Settings & settings,
@@ -308,6 +308,9 @@ class Cluster
308308
ShardInfoInsertPathForInternalReplication insert_paths = {},
309309
bool internal_replication = false);
310310

311+
/// Reduce size of cluster to max_hosts
312+
void constrainShardInfoAndAddressesToMaxHosts(size_t max_hosts);
313+
311314
/// Inter-server secret
312315
String secret;
313316

src/Interpreters/InterpreterCreateQuery.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,8 +1949,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
19491949
auto table_function_ast = create.as_table_function->ptr();
19501950
auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
19511951

1952-
if (!table_function->canBeUsedToCreateTable())
1953-
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' cannot be used to create a table", table_function->getName());
1952+
table_function->validateUseToCreateTable();
19541953

19551954
/// In case of CREATE AS table_function() query we should use global context
19561955
/// in storage creation because there will be no query context on server startup

0 commit comments

Comments
 (0)