From 05976ed931f79defadf9d257e5a44fc5d71c0567 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Tue, 24 Dec 2024 16:16:03 +0100 Subject: [PATCH 1/6] s3Cluster hive optimization --- src/Planner/Planner.cpp | 22 ++++ src/Processors/QueryPlan/ObjectFilterStep.cpp | 58 +++++++++ src/Processors/QueryPlan/ObjectFilterStep.h | 35 ++++++ .../optimizePrimaryKeyConditionAndLimit.cpp | 5 + .../QueryPlan/QueryPlanStepRegistry.cpp | 2 + .../StorageObjectStorageCluster.cpp | 2 +- .../StorageObjectStorageCluster.h | 1 - tests/integration/test_s3_cluster/test.py | 113 +++++++++++++++++- 8 files changed, 235 insertions(+), 3 deletions(-) create mode 100644 src/Processors/QueryPlan/ObjectFilterStep.cpp create mode 100644 src/Processors/QueryPlan/ObjectFilterStep.h diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 19ba725523f3..5f9cd4da5bc9 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -133,6 +134,7 @@ namespace Setting extern const SettingsUInt64 min_count_to_compile_aggregate_expression; extern const SettingsBool enable_software_prefetch_in_aggregation; extern const SettingsBool optimize_group_by_constant_keys; + extern const SettingsBool use_hive_partitioning; } namespace ServerSetting @@ -413,6 +415,19 @@ void addFilterStep(QueryPlan & query_plan, query_plan.addStep(std::move(where_step)); } +void addObjectFilterStep(QueryPlan & query_plan, + FilterAnalysisResult & filter_analysis_result, + const std::string & step_description) +{ + auto actions = std::move(filter_analysis_result.filter_actions->dag); + + auto where_step = std::make_unique(query_plan.getCurrentHeader(), + std::move(actions), + filter_analysis_result.filter_column_name); + where_step->setStepDescription(step_description); + query_plan.addStep(std::move(where_step)); +} + Aggregator::Params getAggregatorParams(const PlannerContextPtr & planner_context, const AggregationAnalysisResult & aggregation_analysis_result, const QueryAnalysisResult & query_analysis_result, @@ -1670,6 +1685,13 @@ void Planner::buildPlanForQueryNode() if (query_processing_info.isSecondStage() || query_processing_info.isFromAggregationState()) { + if (settings[Setting::use_hive_partitioning] + && !query_processing_info.isFirstStage() + && expression_analysis_result.hasWhere()) + { + addObjectFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE"); + } + if (query_processing_info.isFromAggregationState()) { /// Aggregation was performed on remote shards diff --git a/src/Processors/QueryPlan/ObjectFilterStep.cpp b/src/Processors/QueryPlan/ObjectFilterStep.cpp new file mode 100644 index 000000000000..7c03a3699e31 --- /dev/null +++ b/src/Processors/QueryPlan/ObjectFilterStep.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +ObjectFilterStep::ObjectFilterStep( + const Header & input_header_, + ActionsDAG actions_dag_, + String filter_column_name_) + : actions_dag(std::move(actions_dag_)) + , filter_column_name(std::move(filter_column_name_)) +{ + input_headers.emplace_back(std::move(input_header_)); + output_header = input_headers.front(); +} + +QueryPipelineBuilderPtr ObjectFilterStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & /* settings */) +{ + return std::move(pipelines.front()); +} + +void ObjectFilterStep::updateOutputHeader() +{ + output_header = input_headers.front(); +} + +void ObjectFilterStep::serialize(Serialization & ctx) const +{ + writeStringBinary(filter_column_name, ctx.out); + + actions_dag.serialize(ctx.out, ctx.registry); +} + +std::unique_ptr ObjectFilterStep::deserialize(Deserialization & ctx) +{ + if (ctx.input_headers.size() != 1) + throw Exception(ErrorCodes::INCORRECT_DATA, "ObjectFilterStep must have one input stream"); + + String filter_column_name; + readStringBinary(filter_column_name, ctx.in); + + ActionsDAG actions_dag = ActionsDAG::deserialize(ctx.in, ctx.registry, ctx.context); + + return std::make_unique(ctx.input_headers.front(), std::move(actions_dag), std::move(filter_column_name)); +} + +void registerObjectFilterStep(QueryPlanStepRegistry & registry) +{ + registry.registerStep("ObjectFilter", ObjectFilterStep::deserialize); +} + +} diff --git a/src/Processors/QueryPlan/ObjectFilterStep.h b/src/Processors/QueryPlan/ObjectFilterStep.h new file mode 100644 index 000000000000..f72cb00c86ab --- /dev/null +++ b/src/Processors/QueryPlan/ObjectFilterStep.h @@ -0,0 +1,35 @@ +#pragma once +#include +#include + +namespace DB +{ + +/// Implements WHERE operation. +class ObjectFilterStep : public IQueryPlanStep +{ +public: + ObjectFilterStep( + const Header & input_header_, + ActionsDAG actions_dag_, + String filter_column_name_); + + String getName() const override { return "ObjectFilter"; } + QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) override; + + const ActionsDAG & getExpression() const { return actions_dag; } + ActionsDAG & getExpression() { return actions_dag; } + const String & getFilterColumnName() const { return filter_column_name; } + + void serialize(Serialization & ctx) const override; + + static std::unique_ptr deserialize(Deserialization & ctx); + +private: + void updateOutputHeader() override; + + ActionsDAG actions_dag; + String filter_column_name; +}; + +} diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp index 490b79fbf8d3..aa72f963f083 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB::QueryPlanOptimizations { @@ -41,6 +42,10 @@ void optimizePrimaryKeyConditionAndLimit(const Stack & stack) /// So this is likely not needed. continue; } + else if (auto * object_filter_step = typeid_cast(iter->node->step.get())) + { + source_step_with_filter->addFilter(object_filter_step->getExpression().clone(), object_filter_step->getFilterColumnName()); + } else { break; diff --git a/src/Processors/QueryPlan/QueryPlanStepRegistry.cpp b/src/Processors/QueryPlan/QueryPlanStepRegistry.cpp index 0df21ff9d057..c378594ef9ce 100644 --- a/src/Processors/QueryPlan/QueryPlanStepRegistry.cpp +++ b/src/Processors/QueryPlan/QueryPlanStepRegistry.cpp @@ -48,6 +48,7 @@ void registerOffsetStep(QueryPlanStepRegistry & registry); void registerFilterStep(QueryPlanStepRegistry & registry); void registerTotalsHavingStep(QueryPlanStepRegistry & registry); void registerExtremesStep(QueryPlanStepRegistry & registry); +void registerObjectFilterStep(QueryPlanStepRegistry & registry); void QueryPlanStepRegistry::registerPlanSteps() { @@ -65,6 +66,7 @@ void QueryPlanStepRegistry::registerPlanSteps() registerFilterStep(registry); registerTotalsHavingStep(registry); registerExtremesStep(registry); + registerObjectFilterStep(registry); } } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 07eecc655998..514b5448fe49 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -116,7 +116,7 @@ RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExten { auto iterator = StorageObjectStorageSource::createFileIterator( configuration, configuration->getQuerySettings(local_context), object_storage, /* distributed_processing */false, - local_context, predicate, virtual_columns, nullptr, local_context->getFileProgressCallback()); + local_context, predicate, getVirtualsList(), nullptr, local_context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 0088ff28fc22..ccecf2b2ae4e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -38,7 +38,6 @@ class StorageObjectStorageCluster : public IStorageCluster const String engine_name; const StorageObjectStorage::ConfigurationPtr configuration; const ObjectStoragePtr object_storage; - NamesAndTypesList virtual_columns; }; } diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index e8bf031021e2..795d702ac778 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -2,7 +2,7 @@ import logging import os import shutil -import time +import uuid from email.errors import HeaderParseError import pytest @@ -508,3 +508,114 @@ def test_cluster_default_expression(started_cluster): ) assert result == expected_result + + +def test_hive_partitioning(started_cluster): + node = started_cluster.instances["s0_0_0"] + for i in range(1,5): + node.query( + f""" + INSERT + INTO FUNCTION s3('http://minio1:9001/root/data/hive/key={i}/data.parquet', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') + VALUES ({i}, {i}) + """ + ) + + query_id_full = str(uuid.uuid4()) + result = node.query( + """ + SELECT count() + FROM s3('http://minio1:9001/root/data/hive/key=**.parquet', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') + WHERE key <= 2 + FORMAT TSV + SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 0 + """, + query_id=query_id_full, + ) + result = int(result) + assert result == 2 + + query_id_optimized = str(uuid.uuid4()) + result = node.query( + """ + SELECT count() + FROM s3('http://minio1:9001/root/data/hive/key=**.parquet', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') + WHERE key <= 2 + FORMAT TSV + SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 1 + """, + query_id=query_id_optimized, + ) + result = int(result) + assert result == 2 + + query_id_cluster_full = str(uuid.uuid4()) + result = node.query( + """ + SELECT count() + FROM s3Cluster(cluster_simple, 'http://minio1:9001/root/data/hive/key=**.parquet', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') + WHERE key <= 2 + FORMAT TSV + SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 0 + """, + query_id=query_id_cluster_full, + ) + result = int(result) + assert result == 2 + + query_id_cluster_optimized = str(uuid.uuid4()) + result = node.query( + """ + SELECT count() + FROM s3Cluster(cluster_simple, 'http://minio1:9001/root/data/hive/key=**.parquet', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') + WHERE key <= 2 + FORMAT TSV + SETTINGS enable_filesystem_cache = 0, use_query_cache = 0, use_cache_for_count_from_files = 0, use_hive_partitioning = 1 + """, + query_id=query_id_cluster_optimized, + ) + result = int(result) + assert result == 2 + + node.query("SYSTEM FLUSH LOGS ON CLUSTER 'cluster_simple'") + + full_traffic = node.query( + f""" + SELECT sum(ProfileEvents['ReadBufferFromS3Bytes']) + FROM clusterAllReplicas(cluster_simple, system.query_log) + WHERE type='QueryFinish' AND initial_query_id='{query_id_full}' + FORMAT TSV + """) + full_traffic = int(full_traffic) + assert full_traffic > 0 # 612*4 + + optimized_traffic = node.query( + f""" + SELECT sum(ProfileEvents['ReadBufferFromS3Bytes']) + FROM clusterAllReplicas(cluster_simple, system.query_log) + WHERE type='QueryFinish' AND initial_query_id='{query_id_optimized}' + FORMAT TSV + """) + optimized_traffic = int(optimized_traffic) + assert optimized_traffic > 0 # 612*2 + assert full_traffic > optimized_traffic + + cluster_full_traffic = node.query( + f""" + SELECT sum(ProfileEvents['ReadBufferFromS3Bytes']) + FROM clusterAllReplicas(cluster_simple, system.query_log) + WHERE type='QueryFinish' AND initial_query_id='{query_id_cluster_full}' + FORMAT TSV + """) + cluster_full_traffic = int(cluster_full_traffic) + assert cluster_full_traffic == full_traffic + + cluster_optimized_traffic = node.query( + f""" + SELECT sum(ProfileEvents['ReadBufferFromS3Bytes']) + FROM clusterAllReplicas(cluster_simple, system.query_log) + WHERE type='QueryFinish' AND initial_query_id='{query_id_cluster_optimized}' + FORMAT TSV + """) + cluster_optimized_traffic = int(cluster_optimized_traffic) + assert cluster_optimized_traffic == optimized_traffic From 42d3249d0c8033d2f96980ecd82bf820a2de78d8 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Mon, 30 Dec 2024 13:06:51 +0100 Subject: [PATCH 2/6] Style fix --- src/Processors/QueryPlan/ObjectFilterStep.cpp | 5 +++++ tests/integration/test_s3_cluster/test.py | 14 +++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/ObjectFilterStep.cpp b/src/Processors/QueryPlan/ObjectFilterStep.cpp index 7c03a3699e31..c38fac78e502 100644 --- a/src/Processors/QueryPlan/ObjectFilterStep.cpp +++ b/src/Processors/QueryPlan/ObjectFilterStep.cpp @@ -9,6 +9,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + ObjectFilterStep::ObjectFilterStep( const Header & input_header_, ActionsDAG actions_dag_, diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 795d702ac778..b57f54eaae01 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -512,7 +512,7 @@ def test_cluster_default_expression(started_cluster): def test_hive_partitioning(started_cluster): node = started_cluster.instances["s0_0_0"] - for i in range(1,5): + for i in range(1, 5): node.query( f""" INSERT @@ -585,7 +585,8 @@ def test_hive_partitioning(started_cluster): FROM clusterAllReplicas(cluster_simple, system.query_log) WHERE type='QueryFinish' AND initial_query_id='{query_id_full}' FORMAT TSV - """) + """ + ) full_traffic = int(full_traffic) assert full_traffic > 0 # 612*4 @@ -595,7 +596,8 @@ def test_hive_partitioning(started_cluster): FROM clusterAllReplicas(cluster_simple, system.query_log) WHERE type='QueryFinish' AND initial_query_id='{query_id_optimized}' FORMAT TSV - """) + """ + ) optimized_traffic = int(optimized_traffic) assert optimized_traffic > 0 # 612*2 assert full_traffic > optimized_traffic @@ -606,7 +608,8 @@ def test_hive_partitioning(started_cluster): FROM clusterAllReplicas(cluster_simple, system.query_log) WHERE type='QueryFinish' AND initial_query_id='{query_id_cluster_full}' FORMAT TSV - """) + """ + ) cluster_full_traffic = int(cluster_full_traffic) assert cluster_full_traffic == full_traffic @@ -616,6 +619,7 @@ def test_hive_partitioning(started_cluster): FROM clusterAllReplicas(cluster_simple, system.query_log) WHERE type='QueryFinish' AND initial_query_id='{query_id_cluster_optimized}' FORMAT TSV - """) + """ + ) cluster_optimized_traffic = int(cluster_optimized_traffic) assert cluster_optimized_traffic == optimized_traffic From ae12cfabb7bdd68ff0aca62b9fe67f6d9db0f0eb Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Mon, 30 Dec 2024 15:55:00 +0100 Subject: [PATCH 3/6] Fix tidy build --- src/Processors/QueryPlan/ObjectFilterStep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/ObjectFilterStep.cpp b/src/Processors/QueryPlan/ObjectFilterStep.cpp index c38fac78e502..2ae2294a571b 100644 --- a/src/Processors/QueryPlan/ObjectFilterStep.cpp +++ b/src/Processors/QueryPlan/ObjectFilterStep.cpp @@ -21,7 +21,7 @@ ObjectFilterStep::ObjectFilterStep( : actions_dag(std::move(actions_dag_)) , filter_column_name(std::move(filter_column_name_)) { - input_headers.emplace_back(std::move(input_header_)); + input_headers.emplace_back(input_header_); output_header = input_headers.front(); } From 9584643b5f55e4875ae3c5bf37a495739c0565cd Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Mon, 30 Dec 2024 20:12:10 +0100 Subject: [PATCH 4/6] Fix test --- tests/integration/test_s3_cluster/test.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index b57f54eaae01..32540713da1b 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -513,13 +513,23 @@ def test_cluster_default_expression(started_cluster): def test_hive_partitioning(started_cluster): node = started_cluster.instances["s0_0_0"] for i in range(1, 5): - node.query( + exists = node.query( f""" - INSERT - INTO FUNCTION s3('http://minio1:9001/root/data/hive/key={i}/data.parquet', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') - VALUES ({i}, {i}) + SELECT + count() + FROM s3('http://minio1:9001/root/data/hive/key={i}/*', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') + GROUP BY ALL + FORMAT TSV """ ) + if int(exists) == 0: + node.query( + f""" + INSERT + INTO FUNCTION s3('http://minio1:9001/root/data/hive/key={i}/data.parquet', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') + VALUES ({i}, {i}) + """ + ) query_id_full = str(uuid.uuid4()) result = node.query( From 722f4a94b705393aae22f35fe8409b3e0cb77db1 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Thu, 2 Jan 2025 12:06:56 +0100 Subject: [PATCH 5/6] Do not use ObjectFilter when not required --- src/Planner/Planner.cpp | 5 +++- src/Storages/IStorageCluster.cpp | 46 ------------------------------- src/Storages/IStorageCluster.h | 47 ++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 47 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 5f9cd4da5bc9..d41f08de29ab 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1689,7 +1689,10 @@ void Planner::buildPlanForQueryNode() && !query_processing_info.isFirstStage() && expression_analysis_result.hasWhere()) { - addObjectFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE"); + if (typeid_cast(query_plan.getRootNode()->step.get())) + { + addObjectFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE"); + } } if (query_processing_info.isFromAggregationState()) diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index 219092e7ab5a..28b5a84166a2 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -47,51 +46,6 @@ IStorageCluster::IStorageCluster( { } -class ReadFromCluster : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromCluster"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromCluster( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::shared_ptr storage_, - ASTPtr query_to_send_, - QueryProcessingStage::Enum processed_stage_, - ClusterPtr cluster_, - LoggerPtr log_) - : SourceStepWithFilter( - std::move(sample_block), - column_names_, - query_info_, - storage_snapshot_, - context_) - , storage(std::move(storage_)) - , query_to_send(std::move(query_to_send_)) - , processed_stage(processed_stage_) - , cluster(std::move(cluster_)) - , log(log_) - { - } - -private: - std::shared_ptr storage; - ASTPtr query_to_send; - QueryProcessingStage::Enum processed_stage; - ClusterPtr cluster; - LoggerPtr log; - - std::optional extension; - - void createExtension(const ActionsDAG::Node * predicate); - ContextPtr updateSettings(const Settings & settings); -}; - void ReadFromCluster::applyFilters(ActionDAGNodes added_filter_nodes) { SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index d000e24562ff..4d7a047e0c3e 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -52,4 +53,50 @@ class IStorageCluster : public IStorage }; +class ReadFromCluster : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromCluster"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + void applyFilters(ActionDAGNodes added_filter_nodes) override; + + ReadFromCluster( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::shared_ptr storage_, + ASTPtr query_to_send_, + QueryProcessingStage::Enum processed_stage_, + ClusterPtr cluster_, + LoggerPtr log_) + : SourceStepWithFilter( + std::move(sample_block), + column_names_, + query_info_, + storage_snapshot_, + context_) + , storage(std::move(storage_)) + , query_to_send(std::move(query_to_send_)) + , processed_stage(processed_stage_) + , cluster(std::move(cluster_)) + , log(log_) + { + } + +private: + std::shared_ptr storage; + ASTPtr query_to_send; + QueryProcessingStage::Enum processed_stage; + ClusterPtr cluster; + LoggerPtr log; + + std::optional extension; + + void createExtension(const ActionsDAG::Node * predicate); + ContextPtr updateSettings(const Settings & settings); +}; + + } From b4f8020c4f613931c9ae3d3a99a78c48a425b677 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Thu, 2 Jan 2025 14:00:06 +0100 Subject: [PATCH 6/6] Fix test --- tests/integration/test_s3_cluster/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 32540713da1b..ae212040d6c8 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -527,7 +527,8 @@ def test_hive_partitioning(started_cluster): f""" INSERT INTO FUNCTION s3('http://minio1:9001/root/data/hive/key={i}/data.parquet', 'minio', 'minio123', 'Parquet', 'key Int32, value Int32') - VALUES ({i}, {i}) + SELECT {i}, {i} + SETTINGS use_hive_partitioning = 0 """ )