Skip to content

Commit 23b41f1

Browse files
authored
Merge pull request #798 from Altinity/feature/antalya-25.3/alternative_syntax
25.3 Antalya port - Alternative syntax for cluster functions
2 parents 00a43ec + 31e0aff commit 23b41f1

File tree

53 files changed

+1660
-318
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1660
-318
lines changed

src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,14 @@ class FunctionTreeNodeImpl : public AbstractFunction
7171
{
7272
public:
7373
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
74-
size_t size() const override { return arguments ? arguments->size() : 0; }
75-
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
74+
size_t size() const override
75+
{ /// size withous skipped indexes
76+
return arguments ? arguments->size() - skippedSize() : 0;
77+
}
78+
std::unique_ptr<Argument> at(size_t n) const override
79+
{ /// n is relative index, some can be skipped
80+
return std::make_unique<ArgumentTreeNode>(arguments->at(getRealIndex(n)).get());
81+
}
7682
private:
7783
const QueryTreeNodes * arguments = nullptr;
7884
};

src/Core/Settings.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6111,6 +6111,15 @@ Enable PRQL - an alternative to SQL.
61116111
)", EXPERIMENTAL) \
61126112
DECLARE(Bool, enable_adaptive_memory_spill_scheduler, false, R"(
61136113
Trigger processor to spill data into external storage adpatively. grace join is supported at present.
6114+
)", EXPERIMENTAL) \
6115+
DECLARE(String, object_storage_cluster, "", R"(
6116+
Cluster to make distributed requests to object storages with alternative syntax.
6117+
)", EXPERIMENTAL) \
6118+
DECLARE(UInt64, object_storage_max_nodes, 0, R"(
6119+
Limit for hosts used for request in object storage cluster table functions - azureBlobStorageCluster, s3Cluster, hdfsCluster, etc.
6120+
Possible values:
6121+
- Positive integer.
6122+
- 0 — All hosts in cluster.
61146123
)", EXPERIMENTAL) \
61156124
\
61166125
/** Experimental tsToGrid aggregate function. */ \

src/Core/SettingsChangesHistory.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
6666
/// controls new feature and it's 'true' by default, use 'false' as previous_value).
6767
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
6868
/// Note: please check if the key already exists to prevent duplicate entries.
69+
addSettingsChanges(settings_changes_history, "25.2.1.20000",
70+
{
71+
// Altinity Antalya modifications atop of 25.2
72+
{"object_storage_cluster", "", "", "New setting"},
73+
{"object_storage_max_nodes", 0, 0, "New setting"},
74+
});
6975
addSettingsChanges(settings_changes_history, "25.4",
7076
{
7177
});

src/Databases/DataLake/DatabaseDataLake.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <Storages/ConstraintsDescription.h>
1616
#include <Storages/StorageNull.h>
1717
#include <Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h>
18+
#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
1819

1920
#include <Interpreters/evaluateConstantExpression.h>
2021
#include <Interpreters/Context.h>
@@ -40,6 +41,7 @@ namespace DatabaseDataLakeSetting
4041
extern const DatabaseDataLakeSettingsString storage_endpoint;
4142
extern const DatabaseDataLakeSettingsString oauth_server_uri;
4243
extern const DatabaseDataLakeSettingsBool vended_credentials;
44+
extern const DatabaseDataLakeSettingsString object_storage_cluster;
4345

4446

4547
extern const DatabaseDataLakeSettingsString aws_access_key_id;
@@ -403,9 +405,12 @@ StoragePtr DatabaseDataLake::tryGetTableImpl(const String & name, ContextPtr con
403405

404406
/// with_table_structure = false: because there will be
405407
/// no table structure in table definition AST.
406-
StorageObjectStorage::Configuration::initialize(*configuration, args, context_copy, /* with_table_structure */false, storage_settings);
408+
configuration->initialize(args, context_copy, /* with_table_structure */false, storage_settings);
407409

408-
return std::make_shared<StorageObjectStorage>(
410+
auto cluster_name = settings[DatabaseDataLakeSetting::object_storage_cluster].value;
411+
412+
return std::make_shared<StorageObjectStorageCluster>(
413+
cluster_name,
409414
configuration,
410415
configuration->createObjectStorage(context_copy, /* is_readonly */ false),
411416
context_copy,
@@ -415,9 +420,7 @@ StoragePtr DatabaseDataLake::tryGetTableImpl(const String & name, ContextPtr con
415420
/* comment */"",
416421
getFormatSettings(context_copy),
417422
LoadingStrictnessLevel::CREATE,
418-
/* distributed_processing */false,
419-
/* partition_by */nullptr,
420-
/* lazy_init */true);
423+
/* partition_by */nullptr);
421424
}
422425

423426
DatabaseTablesIteratorPtr DatabaseDataLake::getTablesIterator(

src/Databases/DataLake/DatabaseDataLakeSettings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ namespace ErrorCodes
2626
DECLARE(String, aws_secret_access_key, "", "Key for AWS connection for Glue Catalog'", 0) \
2727
DECLARE(String, region, "", "Region for Glue catalog", 0) \
2828
DECLARE(String, storage_endpoint, "", "Object storage endpoint", 0) \
29+
DECLARE(String, object_storage_cluster, "", "Cluster for distributed requests", 0) \
2930

3031
#define LIST_OF_DATABASE_ICEBERG_SETTINGS(M, ALIAS) \
3132
DATABASE_ICEBERG_RELATED_SETTINGS(M, ALIAS)

src/Disks/DiskType.cpp

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ namespace ErrorCodes
99
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
1010
}
1111

12-
MetadataStorageType metadataTypeFromString(const String & type)
12+
MetadataStorageType metadataTypeFromString(const std::string & type)
1313
{
1414
auto check_type = Poco::toLower(type);
1515
if (check_type == "local")
@@ -53,23 +53,47 @@ std::string DataSourceDescription::toString() const
5353
case DataSourceType::RAM:
5454
return "memory";
5555
case DataSourceType::ObjectStorage:
56-
{
57-
switch (object_storage_type)
58-
{
59-
case ObjectStorageType::S3:
60-
return "s3";
61-
case ObjectStorageType::HDFS:
62-
return "hdfs";
63-
case ObjectStorageType::Azure:
64-
return "azure_blob_storage";
65-
case ObjectStorageType::Local:
66-
return "local_blob_storage";
67-
case ObjectStorageType::Web:
68-
return "web";
69-
case ObjectStorageType::None:
70-
return "none";
71-
}
72-
}
56+
return DB::toString(object_storage_type);
7357
}
7458
}
59+
60+
ObjectStorageType objectStorageTypeFromString(const std::string & type)
61+
{
62+
auto check_type = Poco::toLower(type);
63+
if (check_type == "s3")
64+
return ObjectStorageType::S3;
65+
if (check_type == "hdfs")
66+
return ObjectStorageType::HDFS;
67+
if (check_type == "azure_blob_storage" || check_type == "azure")
68+
return ObjectStorageType::Azure;
69+
if (check_type == "local_blob_storage" || check_type == "local")
70+
return ObjectStorageType::Local;
71+
if (check_type == "web")
72+
return ObjectStorageType::Web;
73+
if (check_type == "none")
74+
return ObjectStorageType::None;
75+
76+
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
77+
"Unknown object storage type: {}", type);
78+
}
79+
80+
std::string toString(ObjectStorageType type)
81+
{
82+
switch (type)
83+
{
84+
case ObjectStorageType::S3:
85+
return "s3";
86+
case ObjectStorageType::HDFS:
87+
return "hdfs";
88+
case ObjectStorageType::Azure:
89+
return "azure_blob_storage";
90+
case ObjectStorageType::Local:
91+
return "local_blob_storage";
92+
case ObjectStorageType::Web:
93+
return "web";
94+
case ObjectStorageType::None:
95+
return "none";
96+
}
97+
}
98+
7599
}

src/Disks/DiskType.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ enum class MetadataStorageType : uint8_t
3434
Memory,
3535
};
3636

37-
MetadataStorageType metadataTypeFromString(const String & type);
38-
String toString(DataSourceType data_source_type);
37+
MetadataStorageType metadataTypeFromString(const std::string & type);
38+
39+
ObjectStorageType objectStorageTypeFromString(const std::string & type);
40+
std::string toString(ObjectStorageType type);
3941

4042
struct DataSourceDescription
4143
{

src/Interpreters/Cluster.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -717,9 +717,9 @@ void Cluster::initMisc()
717717
}
718718
}
719719

720-
std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard) const
720+
std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard, size_t max_hosts) const
721721
{
722-
return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings, max_replicas_from_shard)};
722+
return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings, max_replicas_from_shard, max_hosts)};
723723
}
724724

725725
std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
@@ -768,7 +768,7 @@ void shuffleReplicas(std::vector<Cluster::Address> & replicas, const Settings &
768768

769769
}
770770

771-
Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard)
771+
Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard, size_t max_hosts)
772772
{
773773
if (from.addresses_with_failover.empty())
774774
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cluster is empty");
@@ -790,6 +790,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
790790

791791
if (address.is_local)
792792
info.local_addresses.push_back(address);
793+
addresses_with_failover.emplace_back(Addresses({address}));
793794

794795
auto pool = ConnectionPoolFactory::instance().get(
795796
static_cast<unsigned>(settings[Setting::distributed_connections_pool_size]),
@@ -811,9 +812,6 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
811812
info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings[Setting::load_balancing]);
812813
info.per_replica_pools = {std::move(pool)};
813814

814-
addresses_with_failover.emplace_back(Addresses{address});
815-
816-
slot_to_shard.insert(std::end(slot_to_shard), info.weight, shards_info.size());
817815
shards_info.emplace_back(std::move(info));
818816
}
819817
};
@@ -835,10 +833,37 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
835833
secret = from.secret;
836834
name = from.name;
837835

836+
constrainShardInfoAndAddressesToMaxHosts(max_hosts);
837+
838+
for (size_t i = 0; i < shards_info.size(); ++i)
839+
slot_to_shard.insert(std::end(slot_to_shard), shards_info[i].weight, i);
840+
838841
initMisc();
839842
}
840843

841844

845+
void Cluster::constrainShardInfoAndAddressesToMaxHosts(size_t max_hosts)
846+
{
847+
if (max_hosts == 0 || shards_info.size() <= max_hosts)
848+
return;
849+
850+
pcg64_fast gen{randomSeed()};
851+
std::shuffle(shards_info.begin(), shards_info.end(), gen);
852+
shards_info.resize(max_hosts);
853+
854+
AddressesWithFailover addresses_with_failover_;
855+
856+
UInt32 shard_num = 0;
857+
for (auto & shard_info : shards_info)
858+
{
859+
addresses_with_failover_.push_back(addresses_with_failover[shard_info.shard_num - 1]);
860+
shard_info.shard_num = ++shard_num;
861+
}
862+
863+
addresses_with_failover.swap(addresses_with_failover_);
864+
}
865+
866+
842867
Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector<size_t> & indices)
843868
{
844869
for (size_t index : indices)

src/Interpreters/Cluster.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ class Cluster
266266
std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
267267

268268
/// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
269-
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard = 0) const;
269+
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard = 0, size_t max_hosts = 0) const;
270270

271271
/// Returns false if cluster configuration doesn't allow to use it for cross-replication.
272272
/// NOTE: true does not mean, that it's actually a cross-replication cluster.
@@ -292,7 +292,7 @@ class Cluster
292292

293293
/// For getClusterWithReplicasAsShards implementation
294294
struct ReplicasAsShardsTag {};
295-
Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard);
295+
Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard, size_t max_hosts);
296296

297297
void addShard(
298298
const Settings & settings,
@@ -304,6 +304,9 @@ class Cluster
304304
ShardInfoInsertPathForInternalReplication insert_paths = {},
305305
bool internal_replication = false);
306306

307+
/// Reduce size of cluster to max_hosts
308+
void constrainShardInfoAndAddressesToMaxHosts(size_t max_hosts);
309+
307310
/// Inter-server secret
308311
String secret;
309312

src/Interpreters/InterpreterCreateQuery.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1925,8 +1925,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
19251925
auto table_function_ast = create.as_table_function->ptr();
19261926
auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
19271927

1928-
if (!table_function->canBeUsedToCreateTable())
1929-
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' cannot be used to create a table", table_function->getName());
1928+
table_function->validateUseToCreateTable();
19301929

19311930
/// In case of CREATE AS table_function() query we should use global context
19321931
/// in storage creation because there will be no query context on server startup

0 commit comments

Comments
 (0)