From 9567a6509d9019bd5c4fd684d79495e6d06445ba Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Thu, 24 Jul 2025 14:39:45 -0400 Subject: [PATCH 001/109] FragmentMetadata checks that number of tiles matches dense domain --- tiledb/sm/fragment/fragment_metadata.cc | 35 +++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index 75f5273b51e..fc5681a65c1 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -841,6 +841,36 @@ void FragmentMetadata::load( } void FragmentMetadata::store(const EncryptionKey& encryption_key) { + // integrity checks + if (dense_) { + const uint64_t dense_tile_num = tile_num(); + + for (const auto& tile_offsets : loaded_metadata_ptr_->tile_offsets()) { + iassert(tile_offsets.size() == dense_tile_num); + } + for (const auto& tile_var_offsets : + loaded_metadata_ptr_->tile_var_offsets()) { + iassert(tile_var_offsets.size() == dense_tile_num); + } + for (const auto& tile_var_sizes : loaded_metadata_ptr_->tile_var_sizes()) { + iassert(tile_var_sizes.size() == dense_tile_num); + } + for (const auto& tile_validity_offsets : + loaded_metadata_ptr_->tile_validity_offsets()) { + iassert(tile_validity_offsets.size() == dense_tile_num); + } + for (const auto& tile_null_counts : + loaded_metadata_ptr_->tile_null_counts()) { + if (!tile_null_counts.empty()) { + iassert(tile_null_counts.size() == dense_tile_num); + } + } + + // what about min, max, sum? + // requires iteration in stride with schema fields to get cell size + // probably a good idea, ask about it in code review + } + auto timer_se = resources_->stats().start_timer("write_store_frag_meta"); // Make sure the data fits in the current domain before we commit to disk. @@ -1194,6 +1224,11 @@ void FragmentMetadata::store_v15_or_higher( } void FragmentMetadata::set_num_tiles(uint64_t num_tiles) { + if (dense_) { + const uint64_t dense_tile_num = tile_num(); + iassert(num_tiles <= dense_tile_num); + } + for (auto& it : idx_map_) { auto i = it.second; iassert(num_tiles >= loaded_metadata_ptr_->tile_offsets()[i].size()); From 254fdcbcebf37b7868448c424ce1a6fce8298665 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 24 Sep 2025 11:15:12 -0400 Subject: [PATCH 002/109] GlobalOrderWriter::dense --- .../sm/query/writers/global_order_writer.cc | 22 ++++++++----------- tiledb/sm/query/writers/global_order_writer.h | 7 ++++++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 38b3a3523ba..32a5c1fb099 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -202,7 +202,7 @@ Status GlobalOrderWriter::init_global_write_state() { const auto& domain{array_schema_.domain()}; const auto capacity = array_schema_.capacity(); const auto cell_num_per_tile = - coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); + dense() ? domain.cell_num_per_tile() : capacity; auto last_tiles_it = global_write_state_->last_tiles_.emplace( std::piecewise_construct, std::forward_as_tuple(name), @@ -388,7 +388,7 @@ Status GlobalOrderWriter::check_global_order() const { } // Applicable only to sparse writes - exit if coordinates do not exist - if (!coords_info_.has_coords_ || coords_info_.coords_num_ == 0) { + if (dense() || coords_info_.coords_num_ == 0) { return Status::Ok(); } @@ -656,7 +656,7 @@ Status GlobalOrderWriter::finalize_global_write_state() { } // Check if the total number of cells written is equal to the subarray size - if (!coords_info_.has_coords_) { // This implies a dense array + if (dense()) { auto& domain{array_schema_.domain()}; auto expected_cell_num = domain.cell_num(subarray_.ndrange(0)); @@ -727,8 +727,8 @@ Status GlobalOrderWriter::global_write() { // Initialize the global write state if this is the first invocation if (!global_write_state_) { RETURN_CANCEL_OR_ERROR(alloc_global_write_state()); - RETURN_CANCEL_OR_ERROR(create_fragment( - !coords_info_.has_coords_, global_write_state_->frag_meta_)); + RETURN_CANCEL_OR_ERROR( + create_fragment(dense(), global_write_state_->frag_meta_)); RETURN_CANCEL_OR_ERROR(init_global_write_state()); } @@ -821,8 +821,7 @@ Status GlobalOrderWriter::global_write() { Status GlobalOrderWriter::global_write_handle_last_tile() { auto capacity = array_schema_.capacity(); auto& domain = array_schema_.domain(); - auto cell_num_per_tile = - coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); + auto cell_num_per_tile = dense() ? domain.cell_num_per_tile() : capacity; auto cell_num_last_tiles = global_write_state_->cells_written_[buffers_.begin()->first] % cell_num_per_tile; @@ -906,8 +905,7 @@ Status GlobalOrderWriter::prepare_full_tiles_fixed( auto capacity = array_schema_.capacity(); auto cell_num = *buffer_size / cell_size; auto& domain{array_schema_.domain()}; - auto cell_num_per_tile = - coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); + auto cell_num_per_tile = dense() ? domain.cell_num_per_tile() : capacity; // Do nothing if there are no cells to write if (cell_num == 0) { @@ -1087,8 +1085,7 @@ Status GlobalOrderWriter::prepare_full_tiles_var( auto capacity = array_schema_.capacity(); auto cell_num = buffer_size / constants::cell_var_offset_size; auto& domain{array_schema_.domain()}; - auto cell_num_per_tile = - coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); + auto cell_num_per_tile = dense() ? domain.cell_num_per_tile() : capacity; auto attr_datatype_size = datatype_size(array_schema_.type(name)); // Do nothing if there are no cells to write @@ -1455,8 +1452,7 @@ Status GlobalOrderWriter::start_new_fragment() { // Create a new fragment. current_fragment_size_ = 0; - RETURN_NOT_OK(create_fragment( - !coords_info_.has_coords_, global_write_state_->frag_meta_)); + RETURN_NOT_OK(create_fragment(dense(), global_write_state_->frag_meta_)); return Status::Ok(); } diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index c15b81f67c8..cf4ba44ed61 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -391,6 +391,13 @@ class GlobalOrderWriter : public WriterBase { * be written at once. */ Status start_new_fragment(); + + /** + * @return true if this write is to a dense fragment + */ + bool dense() const { + return !coords_info_.has_coords_; + } }; } // namespace sm From fac092f7bfd678ce6e0cefee12f2d45e1fbbd07e Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 26 Sep 2025 08:35:13 -0400 Subject: [PATCH 003/109] GlobalOrderWriter::identify_fragment_tile_boundaries --- .../sm/query/writers/global_order_writer.cc | 139 +++++++++++------- tiledb/sm/query/writers/global_order_writer.h | 23 +-- 2 files changed, 95 insertions(+), 67 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 32a5c1fb099..a4eac118b2a 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -91,7 +91,7 @@ GlobalOrderWriter::GlobalOrderWriter( remote_query, fragment_name) , processed_conditions_(processed_conditions) - , fragment_size_(fragment_size) + , max_fragment_size_(fragment_size) , current_fragment_size_(0) { // Check the layout is global order. if (layout_ != Layout::GLOBAL_ORDER) { @@ -748,21 +748,7 @@ Status GlobalOrderWriter::global_write() { query_memory_tracker_->get_resource(MemoryType::WRITER_TILE_DATA)); RETURN_CANCEL_OR_ERROR(prepare_full_tiles(coord_dups, &tiles)); - // Find number of tiles and gather stats - uint64_t tile_num = 0; - if (!tiles.empty()) { - auto it = tiles.begin(); - tile_num = it->second.size(); - - uint64_t cell_num = 0; - for (size_t t = 0; t < tile_num; ++t) { - cell_num += it->second[t].cell_num(); - } - stats_->add_counter("cell_num", cell_num); - stats_->add_counter("tile_num", tile_num); - } - - // No cells to be written + const uint64_t tile_num = (tiles.empty() ? 0 : tiles.begin()->second.size()); if (tile_num == 0) { return Status::Ok(); } @@ -776,43 +762,44 @@ Status GlobalOrderWriter::global_write() { // Filter all tiles RETURN_CANCEL_OR_ERROR(filter_tiles(&tiles)); - uint64_t idx = 0; - while (idx < tile_num) { - auto frag_meta = global_write_state_->frag_meta_; - - // Compute the number of tiles that will fit in this fragment. - auto num = num_tiles_to_write(idx, tile_num, tiles); - - // If we're resuming a fragment write and the first tile doesn't fit into - // the previous fragment, we need to start a new fragment and recalculate - // the number of tiles to write. - if (current_fragment_size_ > 0 && num == 0) { - RETURN_CANCEL_OR_ERROR(start_new_fragment()); - num = num_tiles_to_write(idx, tile_num, tiles); - } - - // Set new number of tiles in the fragment metadata - auto new_num_tiles = frag_meta->tile_index_base() + num; - frag_meta->set_num_tiles(new_num_tiles); + const auto fragments = identify_fragment_tile_boundaries(tiles); - if (new_num_tiles == 0) { - throw GlobalOrderWriterException( - "Fragment size is too small to write a single tile"); - } + for (uint64_t f = 0; f < fragments.size(); f++) { + const uint64_t start_tile = fragments[f].second; + const uint64_t num_tiles = + (f + 1 < fragments.size() ? fragments[f + 1].second : tile_num) - + start_tile; - set_coords_metadata(idx, idx + num, tiles, mbrs, frag_meta); + auto frag_meta = global_write_state_->frag_meta_; + if (num_tiles == 0) { + // this should only happen if there is only one tile of input and we have + // to wait for finalize, or if continuing a fragment from a previous write + // and there is no more room + iassert(f == 0); + if (current_fragment_size_ > 0) { + RETURN_CANCEL_OR_ERROR(start_new_fragment()); + } else { + iassert(fragments.size() == 1); + } + } else { + if (f > 0) { + RETURN_CANCEL_OR_ERROR(start_new_fragment()); + } + // update metadata of current fragment + frag_meta->set_num_tiles(frag_meta->tile_index_base() + num_tiles); - // Write tiles for all attributes - RETURN_CANCEL_OR_ERROR(write_tiles(idx, idx + num, frag_meta, &tiles)); - idx += num; + set_coords_metadata( + start_tile, start_tile + num_tiles, tiles, mbrs, frag_meta); - // If we didn't write all tiles, close this fragment and start another. - if (idx != tile_num) { - RETURN_CANCEL_OR_ERROR(start_new_fragment()); + // write tiles for all attributes + RETURN_CANCEL_OR_ERROR( + write_tiles(start_tile, start_tile + num_tiles, frag_meta, &tiles)); } + frag_meta->set_tile_index_base(frag_meta->tile_index_base() + num_tiles); + } - // Increment the tile index base for the next global order write. - frag_meta->set_tile_index_base(new_num_tiles); + if (!fragments.empty()) { + current_fragment_size_ = fragments.back().first; } return Status::Ok(); @@ -1368,10 +1355,22 @@ Status GlobalOrderWriter::prepare_full_tiles_var( return Status::Ok(); } -uint64_t GlobalOrderWriter::num_tiles_to_write( - uint64_t start, - uint64_t tile_num, - tdb::pmr::unordered_map& tiles) { +/** + * Identifies the division of input cells into target fragments, + * using `max_fragment_size_` as a hard limit on the target fragment size. + * + * `current_fragment_size_` may be nonzero if continuing a fragment from + * a previous `submit()`, so this field is used to initialize the fragment size + * before the first tile is examined. + * + * @param tiles + * + * @return a list of (fragment size, tile offset) pairs identifying the division + * of input data into target fragments + */ +std::vector> +GlobalOrderWriter::identify_fragment_tile_boundaries( + tdb::pmr::unordered_map& tiles) const { // Cache variables to prevent map lookups. const auto buf_names = buffer_names(); std::vector var_size; @@ -1386,8 +1385,28 @@ uint64_t GlobalOrderWriter::num_tiles_to_write( writer_tile_vectors.emplace_back(&tiles.at(name)); } + // Find number of tiles and gather stats + uint64_t tile_num = 0; + if (!tiles.empty()) { + auto it = tiles.begin(); + tile_num = it->second.size(); + + uint64_t cell_num = 0; + for (size_t t = 0; t < tile_num; ++t) { + cell_num += it->second[t].cell_num(); + } + stats_->add_counter("cell_num", cell_num); + stats_->add_counter("tile_num", tile_num); + } + + uint64_t fragment_size = current_fragment_size_; + uint64_t fragment_start = 0; + std::vector> fragments; + // Make sure we don't write more than the desired fragment size. - for (uint64_t t = start; t < tile_num; t++) { + // FIXME: for dense array this has to be aligned to a "hyper-row" + // so that we can have + for (uint64_t t = 0; t < tile_num; t++) { uint64_t tile_size = 0; for (uint64_t a = 0; a < buf_names.size(); a++) { if (var_size[a]) { @@ -1412,14 +1431,22 @@ uint64_t GlobalOrderWriter::num_tiles_to_write( } } - if (current_fragment_size_ + tile_size > fragment_size_) { - return t - start; + if (fragment_size + tile_size > max_fragment_size_) { + if (fragment_size == 0) { + throw GlobalOrderWriterException( + "Fragment size is too small to write a single tile"); + } + fragments.push_back(std::make_pair(fragment_size, fragment_start)); + fragment_size = 0; + fragment_start = t; } - current_fragment_size_ += tile_size; + fragment_size += tile_size; } - return tile_num - start; + fragments.push_back(std::make_pair(fragment_size, fragment_start)); + + return fragments; } Status GlobalOrderWriter::start_new_fragment() { diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index cf4ba44ed61..b08e50a6d71 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -208,7 +208,7 @@ class GlobalOrderWriter : public WriterBase { * The desired fragment size, in bytes. The writer will create a new fragment * once this size has been reached. */ - uint64_t fragment_size_; + uint64_t max_fragment_size_; /** * Size currently written to the fragment. @@ -371,19 +371,20 @@ class GlobalOrderWriter : public WriterBase { WriterTileTupleVector* tiles) const; /** - * Return the number of tiles to write depending on the desired fragment - * size. The tiles passed in as an argument should have already been - * filtered. + * Identify the manner in which the filtered input tiles map onto target + * fragments. If `max_fragment_size_` is much larger than the input, this may + * return just one result. + * + * Each element of the returned vector is a pair `(fragment_size, start_tile)` + * indicating the size of the fragment, and the first tile offset which + * corresponds to that fragment. * - * @param start Current tile index. - * @param tile_num Number of tiles in the tiles vectors. * @param tiles Map of vector of tiles, per attributes. - * @return Number of tiles to write. + * @return a list of `(fragment_size, start_tile)` pairs ordered on + * `start_tile` */ - uint64_t num_tiles_to_write( - uint64_t start, - uint64_t tile_num, - tdb::pmr::unordered_map& tiles); + std::vector> identify_fragment_tile_boundaries( + tdb::pmr::unordered_map& tiles) const; /** * Close the current fragment and start a new one. The closed fragment will From b9cb68f58bf533871e079baf2b0ff7b503106e00 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 29 Sep 2025 11:36:17 -0400 Subject: [PATCH 004/109] Test "C++ API: Max fragment size dense array" with one example --- test/src/unit-cppapi-max-fragment-size.cc | 173 ++++++++++++++++++++++ test/support/src/array_templates.h | 6 + tiledb/common/arithmetic.h | 14 ++ 3 files changed, 193 insertions(+) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index dd79e638fe4..07b272b2bae 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -30,8 +30,12 @@ * Tests the C++ API for maximum fragment size. */ +#include #include +#include "test/support/src/array_helpers.h" +#include "test/support/src/array_templates.h" #include "test/support/src/helpers.h" +#include "tiledb/common/arithmetic.h" #include "tiledb/common/scoped_executor.h" #include "tiledb/common/stdx_string.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" @@ -41,6 +45,7 @@ #include using namespace tiledb; +using namespace tiledb::test; struct CPPMaxFragmentSizeFx { const int max_domain = 1000000; @@ -503,3 +508,171 @@ TEST_CASE( array.close(); } + +std::optional subarray_num_cells( + std::span> subarray) { + uint64_t num_cells = 1; + for (const auto& dim : subarray) { + auto maybe = checked_arithmetic::mul(num_cells, dim.num_cells()); + if (!maybe.has_value()) { + return std::nullopt; + } + num_cells = maybe.value(); + } + return num_cells; +} + +template +std::vector>> +instance_dense_global_order( + const Context& ctx, + uint64_t max_fragment_size, + const std::vector>& dimensions, + const std::vector>& subarray) { + const std::string array_name = "max_fragment_size_dense_global_order"; + + const std::optional num_cells = subarray_num_cells(subarray); + ASSERTER(num_cells.has_value()); + + Domain domain(ctx); + for (uint64_t d = 0; d < dimensions.size(); d++) { + const std::string dname = "d" + std::to_string(d); + auto dim = Dimension::create( + ctx, + dname, + {{dimensions[d].domain.lower_bound, dimensions[d].domain.upper_bound}}, + dimensions[d].extent); + domain.add_dimension(dim); + } + + auto a = Attribute::create(ctx, "a"); + ArraySchema schema(ctx, TILEDB_DENSE); + schema.set_domain(domain); + schema.add_attributes(a); + + Array::create(array_name, schema); + test::DeleteArrayGuard del(ctx.ptr().get(), array_name.c_str()); + + const int a_offset = 77; + std::vector a_write; + a_write.reserve(num_cells.value()); + for (int i = 0; i < static_cast(num_cells.value()); i++) { + a_write.push_back(a_offset + i); + } + + std::vector api_subarray; + api_subarray.reserve(2 * subarray.size()); + for (const auto& sub_dim : subarray) { + api_subarray.push_back(sub_dim.lower_bound); + api_subarray.push_back(sub_dim.upper_bound); + } + + // write data, should be split into multiple fragments + { + Array array(ctx, array_name, TILEDB_WRITE); + + Subarray sub(ctx, array); + sub.set_subarray(api_subarray); + + Query query(ctx, array, TILEDB_WRITE); + query.set_layout(TILEDB_GLOBAL_ORDER); + query.set_subarray(sub); + query.set_data_buffer("a", a_write); + + query.ptr().get()->query_->set_fragment_size(max_fragment_size); + + ASSERTER(query.submit() == Query::Status::COMPLETE); + query.finalize(); + } + + // then read back + std::vector a_read; + { + a_read.resize(a_write.size()); + + Array array(ctx, array_name, TILEDB_READ); + + Subarray sub(ctx, array); + sub.set_subarray(api_subarray); + + Query query(ctx, array, TILEDB_READ); + query.set_layout(TILEDB_GLOBAL_ORDER); + query.set_subarray(sub); + query.set_data_buffer("a", a_read); + + auto st = query.submit(); + ASSERTER(st == Query::Status::COMPLETE); + } + + ASSERTER(a_read == a_write); + + FragmentInfo finfo(ctx, array_name); + finfo.load(); + + // validate fragment size + for (uint32_t f = 0; f < finfo.fragment_num(); f++) { + const uint64_t fsize = finfo.fragment_size(f); + ASSERTER(fsize <= max_fragment_size); + } + + // collect fragment domains + std::vector>> fragment_domains; + for (uint32_t f = 0; f < finfo.fragment_num(); f++) { + std::vector> this_fragment_domain; + for (uint64_t d = 0; d < dimensions.size(); d++) { + uint64_t bounds[2]; + finfo.get_non_empty_domain(f, d, &bounds[0]); + this_fragment_domain.push_back( + templates::Domain(bounds[0], bounds[1])); + } + fragment_domains.push_back(this_fragment_domain); + } + + // validate fragment domains + ASSERTER(!fragment_domains.empty()); + ASSERTER(fragment_domains[0][0].lower_bound == subarray[0].lower_bound); + ASSERTER(fragment_domains.back()[0].upper_bound == subarray[0].upper_bound); + for (uint32_t f = 0; f < fragment_domains.size(); f++) { + if (f > 0) { + ASSERTER( + fragment_domains[f - 1][0].upper_bound + 1 == + fragment_domains[f][0].lower_bound); + } + // non-first dimensions should match + for (uint64_t d = 1; d < dimensions.size(); d++) { + ASSERTER(fragment_domains[f][d] == subarray[d]); + } + } + + return fragment_domains; +} + +TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { + const std::string array_name = + "cppapi_consolidation_dense_domain_arithmetic_overflow"; + + Context ctx; + + SECTION("Example") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + constexpr size_t span_d2 = 10000; + const std::vector dimensions = { + Dim(0, std::numeric_limits::max() - 1, 1), + Dim(0, span_d2 - 1, span_d2)}; + + const uint64_t base_d1 = 12345; + const std::vector subarray = { + Dom(base_d1 + 0, base_d1 + 1), Dom(0, span_d2 - 1)}; + + const std::vector> expect = { + {Dom(base_d1 + 0, base_d1 + 0), Dom(0, span_d2 - 1)}, + {Dom(base_d1 + 1, base_d1 + 1), Dom(0, span_d2 - 1)}}; + + const auto actual = instance_dense_global_order( + ctx, 64 * 1024, dimensions, subarray); + + CHECK(expect == actual); + } +} diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index bc75487e8ca..6fa20b1680a 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -180,6 +180,8 @@ struct Domain { , upper_bound(std::max(d1, d2)) { } + bool operator==(const Domain&) const = default; + uint64_t num_cells() const { // FIXME: this is incorrect for 64-bit domains which need to check overflow if (std::is_signed::value) { @@ -224,6 +226,10 @@ struct Dimension { , extent(extent) { } + Dimension(value_type lower_bound, value_type upper_bound, value_type extent) + : Dimension(Domain(lower_bound, upper_bound), extent) { + } + Domain domain; value_type extent; }; diff --git a/tiledb/common/arithmetic.h b/tiledb/common/arithmetic.h index 08ced92e703..3aa85a6ae58 100644 --- a/tiledb/common/arithmetic.h +++ b/tiledb/common/arithmetic.h @@ -196,6 +196,20 @@ struct checked_arithmetic { return -negated.value(); } } + + /** + * @return `a * b` if it can be represented as a `uint64_t` without undefined + * behavior, `std::nullopt` otherwise + */ + static std::optional mul(uint64_t a, uint64_t b) { + if (b == 0) { + return 0; + } else if (a > std::numeric_limits::max() / b) { + return std::nullopt; + } else { + return a * b; + } + } }; template <> From 0271827d8dcb7c0502b9e0d0d930d01134792dff Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 29 Sep 2025 11:36:50 -0400 Subject: [PATCH 005/109] Non-const Range::start_fixed and Range::end_fixed --- tiledb/type/range/range.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tiledb/type/range/range.h b/tiledb/type/range/range.h index 3ea85383a2e..89a65d7ff0a 100644 --- a/tiledb/type/range/range.h +++ b/tiledb/type/range/range.h @@ -291,6 +291,12 @@ class Range { return range_.data(); } + inline void* start_fixed() { + iassert(!var_size_); + iassert(range_.size() != 0); + return range_.data(); + } + /** Copies 'start' into this range's start bytes for fixed-size ranges. */ void set_start_fixed(const void* const start) { if (var_size_) { @@ -354,6 +360,13 @@ class Range { return &range_[end_pos]; } + void* end_fixed() { + iassert(!var_size_); + iassert(range_.size() != 0); + auto end_pos = range_.size() / 2; + return &range_[end_pos]; + } + /** Copies 'end' into this range's end bytes for fixed-size ranges. */ void set_end_fixed(const void* const end) { if (var_size_) { From 2f584e94ef895890e1a8f540f11ba02714f5ad95 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 29 Sep 2025 11:38:26 -0400 Subject: [PATCH 006/109] Domain::update_cell_num_per_tile --- tiledb/sm/array_schema/domain.cc | 56 +++++++++++++++----------------- tiledb/sm/array_schema/domain.h | 11 +++++-- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index bef2c6b8ad9..bde13d0ed98 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -105,7 +105,7 @@ Domain::Domain( } // Compute number of cells per tile - compute_cell_num_per_tile(); + update_cell_num_per_tile(); // Compute number of cells per tile set_tile_cell_order_cmp_funcs(); @@ -134,7 +134,7 @@ void Domain::add_dimension(shared_ptr dim) { ++dim_num_; // Compute number of cells per tile - compute_cell_num_per_tile(); + update_cell_num_per_tile(); } bool Domain::all_dims_fixed() const { @@ -187,7 +187,7 @@ bool Domain::all_dims_same_type() const { } uint64_t Domain::cell_num_per_tile() const { - return cell_num_per_tile_; + return cell_num_per_tile_.value_or(0); } template <> @@ -716,38 +716,30 @@ int Domain::tile_order_cmp( /* PRIVATE METHODS */ /* ****************************** */ -void Domain::compute_cell_num_per_tile() { +std::optional Domain::compute_cell_num_per_tile() const { // Applicable to dimensions that have the same type if (!all_dims_same_type()) - return; + return std::nullopt; // Invoke the proper templated function auto type{dimension_ptrs_[0]->type()}; switch (type) { case Datatype::INT32: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); case Datatype::INT64: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); case Datatype::INT8: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); case Datatype::UINT8: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); case Datatype::INT16: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); case Datatype::UINT16: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); case Datatype::UINT32: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); case Datatype::UINT64: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); case Datatype::DATETIME_YEAR: case Datatype::DATETIME_MONTH: case Datatype::DATETIME_WEEK: @@ -770,29 +762,33 @@ void Domain::compute_cell_num_per_tile() { case Datatype::TIME_PS: case Datatype::TIME_FS: case Datatype::TIME_AS: - compute_cell_num_per_tile(); - break; + return compute_cell_num_per_tile(); default: - return; + return std::nullopt; } } template -void Domain::compute_cell_num_per_tile() { +std::optional Domain::compute_cell_num_per_tile() const { // Applicable only to integer domains if (!std::is_integral::value) - return; + return std::nullopt; // Applicable only to non-NULL space tiles if (null_tile_extents()) - return; + return std::nullopt; - cell_num_per_tile_ = 1; + uint64_t cell_num_per_tile = 1; for (unsigned d = 0; d < dim_num_; ++d) { auto tile_extent = *(const T*)this->tile_extent(d).data(); - cell_num_per_tile_ = - Dimension::tile_extent_mult(cell_num_per_tile_, tile_extent); + cell_num_per_tile = + Dimension::tile_extent_mult(cell_num_per_tile, tile_extent); } + return cell_num_per_tile; +} + +void Domain::update_cell_num_per_tile() { + cell_num_per_tile_ = compute_cell_num_per_tile(); } void Domain::set_tile_cell_order_cmp_funcs() { diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index fa6a1a7188a..4132f2dbc6e 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -496,7 +496,7 @@ class Domain { shared_ptr memory_tracker_; /** The number of cells per tile. Meaningful only for the **dense** case. */ - uint64_t cell_num_per_tile_; + std::optional cell_num_per_tile_; /** The cell order of the array the domain belongs to. */ Layout cell_order_; @@ -599,7 +599,7 @@ class Domain { const Dimension* dim, const void* coord_a, const void* coord_b); /** Compute the number of cells per tile. */ - void compute_cell_num_per_tile(); + std::optional compute_cell_num_per_tile() const; /** * Compute the number of cells per tile. @@ -608,7 +608,12 @@ class Domain { * @return void */ template - void compute_cell_num_per_tile(); + std::optional compute_cell_num_per_tile() const; + + /** + * Computes and updates the number of cells per tile. + */ + void update_cell_num_per_tile(); /** Prepares the comparator functions for each dimension. */ void set_tile_cell_order_cmp_funcs(); From 5dedb00814330f72d155ad8b0c1b7fd912eb265d Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 29 Sep 2025 11:39:09 -0400 Subject: [PATCH 007/109] WriterBase::create_fragment optional domain arg --- tiledb/sm/query/writers/writer_base.cc | 6 ++++-- tiledb/sm/query/writers/writer_base.h | 7 ++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tiledb/sm/query/writers/writer_base.cc b/tiledb/sm/query/writers/writer_base.cc index 3e13b990dc9..5b500c18c7b 100644 --- a/tiledb/sm/query/writers/writer_base.cc +++ b/tiledb/sm/query/writers/writer_base.cc @@ -757,7 +757,9 @@ std::string WriterBase::coords_to_str(uint64_t i) const { } Status WriterBase::create_fragment( - bool dense, shared_ptr& frag_meta) { + bool dense, + shared_ptr& frag_meta, + const NDRange* domain) { // Get write version, timestamp array was opened, and a reference to the // array directory. auto write_version = array_->array_schema_latest().write_version(); @@ -787,7 +789,7 @@ Status WriterBase::create_fragment( has_timestamps, has_delete_metadata); - frag_meta->init(subarray_.ndrange(0)); + frag_meta->init(domain ? *domain : subarray_.ndrange(0)); return Status::Ok(); } diff --git a/tiledb/sm/query/writers/writer_base.h b/tiledb/sm/query/writers/writer_base.h index 4f085c1b7c8..b16022a3ce0 100644 --- a/tiledb/sm/query/writers/writer_base.h +++ b/tiledb/sm/query/writers/writer_base.h @@ -293,9 +293,14 @@ class WriterBase : public StrategyBase, public IQueryStrategy { * * @param dense Whether the fragment is dense or not. * @param frag_meta The fragment metadata to be generated. + * @param domain Optional domain for the fragment, uses subarray 0th range if + * not provided * @return Status */ - Status create_fragment(bool dense, shared_ptr& frag_meta); + Status create_fragment( + bool dense, + shared_ptr& frag_meta, + const NDRange* domain = nullptr); /** * Runs the input coordinate and attribute tiles through their From 42e8068535428cd646415c133034c63a646f5068 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 29 Sep 2025 11:46:09 -0400 Subject: [PATCH 008/109] Fix global order writer dense fragment domain with max fragment size --- .../sm/query/writers/global_order_writer.cc | 114 ++++++++++++++---- tiledb/sm/query/writers/global_order_writer.h | 7 +- 2 files changed, 95 insertions(+), 26 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index a4eac118b2a..b1195af8e0c 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -52,6 +52,7 @@ #include "tiledb/sm/tile/tile_metadata_generator.h" #include "tiledb/sm/tile/writer_tile_tuple.h" #include "tiledb/storage_format/uri/generate_uri.h" +#include "tiledb/type/apply_with_type.h" using namespace tiledb; using namespace tiledb::common; @@ -182,11 +183,6 @@ Status GlobalOrderWriter::alloc_global_write_state() { "properly finalized")); global_write_state_.reset(tdb_new(GlobalWriteState, query_memory_tracker_)); - // Alloc FragmentMetadata object - global_write_state_->frag_meta_ = this->create_fragment_metadata(); - // Used in serialization when FragmentMetadata is built from ground up - global_write_state_->frag_meta_->set_context_resources(&resources_); - return Status::Ok(); } @@ -727,8 +723,6 @@ Status GlobalOrderWriter::global_write() { // Initialize the global write state if this is the first invocation if (!global_write_state_) { RETURN_CANCEL_OR_ERROR(alloc_global_write_state()); - RETURN_CANCEL_OR_ERROR( - create_fragment(dense(), global_write_state_->frag_meta_)); RETURN_CANCEL_OR_ERROR(init_global_write_state()); } @@ -770,21 +764,23 @@ Status GlobalOrderWriter::global_write() { (f + 1 < fragments.size() ? fragments[f + 1].second : tile_num) - start_tile; - auto frag_meta = global_write_state_->frag_meta_; if (num_tiles == 0) { // this should only happen if there is only one tile of input and we have // to wait for finalize, or if continuing a fragment from a previous write // and there is no more room iassert(f == 0); if (current_fragment_size_ > 0) { - RETURN_CANCEL_OR_ERROR(start_new_fragment()); + RETURN_CANCEL_OR_ERROR(start_new_fragment(start_tile, num_tiles)); } else { iassert(fragments.size() == 1); } } else { - if (f > 0) { - RETURN_CANCEL_OR_ERROR(start_new_fragment()); + if (f > 0 || !global_write_state_->frag_meta_) { + RETURN_CANCEL_OR_ERROR(start_new_fragment(start_tile, num_tiles)); } + + auto frag_meta = global_write_state_->frag_meta_; + // update metadata of current fragment frag_meta->set_num_tiles(frag_meta->tile_index_base() + num_tiles); @@ -795,7 +791,9 @@ Status GlobalOrderWriter::global_write() { RETURN_CANCEL_OR_ERROR( write_tiles(start_tile, start_tile + num_tiles, frag_meta, &tiles)); } - frag_meta->set_tile_index_base(frag_meta->tile_index_base() + num_tiles); + + global_write_state_->frag_meta_->set_tile_index_base( + global_write_state_->frag_meta_->tile_index_base() + num_tiles); } if (!fragments.empty()) { @@ -1449,23 +1447,78 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( return fragments; } -Status GlobalOrderWriter::start_new_fragment() { - auto frag_meta = global_write_state_->frag_meta_; - auto& uri = frag_meta->fragment_uri(); +/** + * Splits a domain at a tile boundary and returns the two halves of the split. + * + * When writing multiple dense fragments the domain of each fragment + * must accurately reflect the coordinates contained in that fragment. + * This is called when starting a new fragment to update the domain of the + * previous fragment and set the correct starting domain of the new one. + * + * @precondition `tile_offset` must be an offset which bisects the input + * hyper-rectangle into two new hyper-rectangle + */ +static NDRange domain_tile_offset( + const Domain& arraydomain, + const NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + // if a hyper-rectangle is a generalization of a rectangle to N dimensions, + // then let's say a "hyper-row" is a generalization of a row to N dimensions, + // i.e. a hyper-rectangle whose length is 1 in the outer-most dimension + + // compute difference so we can determine number of tiles per hyper-row + const uint64_t domain_num_tiles = arraydomain.tile_num(domain); + + NDRange adjusted = domain; + + // normalize `adjusted` to a single hyper-row so that we can compute number of + // tiles per hyper-row, and thus the number of hyper-rows in the domain + memcpy( + adjusted[0].end_fixed(), + adjusted[0].start_fixed(), + adjusted[0].size() / 2); + + const uint64_t hyperrow_num_tiles = arraydomain.tile_num(adjusted); + iassert(domain_num_tiles % hyperrow_num_tiles == 0); + iassert(start_tile % hyperrow_num_tiles == 0); + iassert(num_tiles % hyperrow_num_tiles == 0); + + const uint64_t start_hyperrow = start_tile / hyperrow_num_tiles; + const uint64_t num_hyperrows = num_tiles / hyperrow_num_tiles; + iassert(num_hyperrows > 0); + + auto fix_bounds = [&](T) { + *static_cast(adjusted[0].start_fixed()) += start_hyperrow; + *static_cast(adjusted[0].end_fixed()) += + start_hyperrow + num_hyperrows - 1; + }; + apply_with_type(fix_bounds, arraydomain.dimension_ptr(0)->type()); + + return adjusted; +} + +Status GlobalOrderWriter::start_new_fragment( + uint64_t tile_start, uint64_t num_tiles) { + // finish off current fragment if there is one + if (global_write_state_->frag_meta_) { + auto frag_meta = global_write_state_->frag_meta_; + auto& uri = frag_meta->fragment_uri(); - // Close all files - RETURN_NOT_OK(close_files(frag_meta)); + // Close all files + RETURN_NOT_OK(close_files(frag_meta)); - // Set the processed conditions - frag_meta->set_processed_conditions(processed_conditions_); + // Set the processed conditions + frag_meta->set_processed_conditions(processed_conditions_); - // Compute fragment min/max/sum/null count - frag_meta->compute_fragment_min_max_sum_null_count(); + // Compute fragment min/max/sum/null count + frag_meta->compute_fragment_min_max_sum_null_count(); - // Flush fragment metadata to storage - frag_meta->store(array_->get_encryption_key()); + // Flush fragment metadata to storage + frag_meta->store(array_->get_encryption_key()); - frag_uris_to_commit_.emplace_back(uri); + frag_uris_to_commit_.emplace_back(uri); + } // Make a new fragment URI. const auto write_version = array_->array_schema_latest().write_version(); @@ -1477,9 +1530,20 @@ Status GlobalOrderWriter::start_new_fragment() { write_version); fragment_uri_ = frag_dir_uri.join_path(new_fragment_str); + // Set domain of new fragment if needed + std::optional new_fragment_domain; + if (dense()) { + new_fragment_domain = domain_tile_offset( + array_schema_.domain(), subarray_.ndrange(0), tile_start, num_tiles); + } + // Create a new fragment. current_fragment_size_ = 0; - RETURN_NOT_OK(create_fragment(dense(), global_write_state_->frag_meta_)); + RETURN_NOT_OK(create_fragment( + dense(), + global_write_state_->frag_meta_, + new_fragment_domain.has_value() ? &new_fragment_domain.value() : + nullptr)); return Status::Ok(); } diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index b08e50a6d71..65cf6304574 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -390,8 +390,13 @@ class GlobalOrderWriter : public WriterBase { * Close the current fragment and start a new one. The closed fragment will * be added to `frag_uris_to_commit_` so that all fragments in progress can * be written at once. + * + * @param tile_start the tile offset into the subarray domain where the + * fragment starts (dense only) + * @param num_tiles the number of tiles which will be written to the new + * fragment (dense only) */ - Status start_new_fragment(); + Status start_new_fragment(uint64_t tile_start, uint64_t num_tiles); /** * @return true if this write is to a dense fragment From eda36034540ed8780eb056f2afec2eda2e55913e Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 29 Sep 2025 15:15:11 -0400 Subject: [PATCH 009/109] Add 'Rectangle Tiles' section --- test/src/unit-cppapi-max-fragment-size.cc | 39 +++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 07b272b2bae..03a66a85b8e 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -581,7 +581,8 @@ instance_dense_global_order( query.ptr().get()->query_->set_fragment_size(max_fragment_size); - ASSERTER(query.submit() == Query::Status::COMPLETE); + const auto status = query.submit(); + ASSERTER(status == Query::Status::COMPLETE); query.finalize(); } @@ -653,7 +654,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { Context ctx; - SECTION("Example") { + SECTION("Row tiles") { using Dim = templates::Dimension; using Dom = templates::Domain; @@ -675,4 +676,38 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { CHECK(expect == actual); } + + SECTION("Rectangle tiles") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + const uint64_t extent_d1 = GENERATE(8, 4); + constexpr size_t span_d2 = 10000; + REQUIRE(span_d2 % extent_d1 == 0); + + const std::vector dimensions = { + Dim(0, std::numeric_limits::max() - 1, extent_d1), + Dim(0, span_d2 - 1, span_d2 / extent_d1)}; + + const uint64_t base_d1 = 100; + const std::vector subarray = { + Dom(base_d1 + 0, base_d1 + 7), Dom(0, span_d2 - 1)}; + + if (extent_d1 == 8) { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS(instance_dense_global_order( + ctx, 64 * 1024, dimensions, subarray)); + } else { + const std::vector> expect = { + {Dom(base_d1 + 0, base_d1 + 0), Dom(0, span_d2 - 1)}, + {Dom(base_d1 + 1, base_d1 + 1), Dom(0, span_d2 - 1)}}; + + const auto actual = instance_dense_global_order( + ctx, 64 * 1024, dimensions, subarray); + + CHECK(expect == actual); + } + } } From cd8a6c7b2e239bb5bf11ae8b42343fcfc0f3fce3 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 29 Sep 2025 21:47:52 -0400 Subject: [PATCH 010/109] Attribute::set_fill_value --- tiledb/sm/cpp_api/attribute.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tiledb/sm/cpp_api/attribute.h b/tiledb/sm/cpp_api/attribute.h index 6aacce384eb..03ca2d696ef 100644 --- a/tiledb/sm/cpp_api/attribute.h +++ b/tiledb/sm/cpp_api/attribute.h @@ -275,6 +275,11 @@ class Attribute { return *this; } + template + Attribute& set_fill_value(T value) { + return set_fill_value(static_cast(&value), sizeof(T)); + } + /** * Gets the default fill value for the input attribute. This value will * be used for the input attribute whenever querying (1) an empty cell in From ec4026f6a09d677f7baeb91dca0ff9b563de608b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 29 Sep 2025 21:48:25 -0400 Subject: [PATCH 011/109] Fix GlobalOrderWriter::clean_up if no fragment is started yet --- tiledb/sm/query/writers/global_order_writer.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index b1195af8e0c..d2dcfea274c 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -494,12 +494,14 @@ Status GlobalOrderWriter::check_global_order_hilbert() const { void GlobalOrderWriter::clean_up() { if (global_write_state_ != nullptr) { - const auto& uri = global_write_state_->frag_meta_->fragment_uri(); + if (global_write_state_->frag_meta_) { + const auto& uri = global_write_state_->frag_meta_->fragment_uri(); - // Cleanup the fragment we are currently writing. There is a chance that the - // URI is empty if creating the first fragment had failed. - if (!uri.empty()) { - resources_.vfs().remove_dir(uri); + // Cleanup the fragment we are currently writing. There is a chance that + // the URI is empty if creating the first fragment had failed. + if (!uri.empty()) { + resources_.vfs().remove_dir(uri); + } } global_write_state_.reset(nullptr); From 4bf45f07b227f4f9c109c6e76f5cb75e1bc3d4da Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 30 Sep 2025 08:07:33 -0400 Subject: [PATCH 012/109] Fix rectangle tiles test --- test/src/unit-cppapi-max-fragment-size.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 03a66a85b8e..dc2078f8fdf 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -693,19 +693,21 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const std::vector subarray = { Dom(base_d1 + 0, base_d1 + 7), Dom(0, span_d2 - 1)}; + const uint64_t max_fragment_size = 4 * 64 * 1024; + if (extent_d1 == 8) { const auto expect = Catch::Matchers::ContainsSubstring( "Fragment size is too small to subdivide dense subarray into " "multiple fragments"); REQUIRE_THROWS(instance_dense_global_order( - ctx, 64 * 1024, dimensions, subarray)); + ctx, max_fragment_size, dimensions, subarray)); } else { const std::vector> expect = { - {Dom(base_d1 + 0, base_d1 + 0), Dom(0, span_d2 - 1)}, - {Dom(base_d1 + 1, base_d1 + 1), Dom(0, span_d2 - 1)}}; + {Dom(base_d1 + 0, base_d1 + 3), Dom(0, span_d2 - 1)}, + {Dom(base_d1 + 4, base_d1 + 7), Dom(0, span_d2 - 1)}}; const auto actual = instance_dense_global_order( - ctx, 64 * 1024, dimensions, subarray); + ctx, max_fragment_size, dimensions, subarray); CHECK(expect == actual); } From 61778128c6ee1cc95d7d87966ec6b1cfc8bf9ed0 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 30 Sep 2025 08:08:30 -0400 Subject: [PATCH 013/109] Test handles fragments being written out of order --- test/src/unit-cppapi-max-fragment-size.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index dc2078f8fdf..74ac9b1b430 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -629,6 +629,25 @@ instance_dense_global_order( fragment_domains.push_back(this_fragment_domain); } + // the fragments are not always emitted in the same order, sort them + std::sort( + fragment_domains.begin(), + fragment_domains.end(), + [&](const auto& left, const auto& right) -> bool { + for (uint64_t d = 0; d < dimensions.size(); d++) { + if (left[d].lower_bound < right[d].lower_bound) { + return true; + } else if (left[d].lower_bound > right[d].lower_bound) { + return false; + } else if (left[d].upper_bound < right[d].upper_bound) { + return true; + } else if (left[d].upper_bound > right[d].upper_bound) { + return false; + } + } + return false; + }); + // validate fragment domains ASSERTER(!fragment_domains.empty()); ASSERTER(fragment_domains[0][0].lower_bound == subarray[0].lower_bound); From b58d9ac241c0d022fd42f8377c5796151fd47030 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 30 Sep 2025 08:09:22 -0400 Subject: [PATCH 014/109] Global writer aligns dense domains to tile row --- .../sm/query/writers/global_order_writer.cc | 95 +++++++++++++------ 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index d2dcfea274c..a10ea31cb5b 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1355,6 +1355,28 @@ Status GlobalOrderWriter::prepare_full_tiles_var( return Status::Ok(); } +/** + * @return the number of tiles in a "hyper-row" of `subarray` within + * `arraydomain` + * + * If a "hyper-rectangle" is a generalization of a rectangle to N dimensions, + * then let's define a "hyper-row" to be a generalization of a row to N + * dimensions. That is, a "hyper-row" is a hyper-rectangle whose length is 1 in + * the outer-most dimension. + */ +static uint64_t compute_hyperrow_num_tiles( + const Domain& arraydomain, const NDRange& subarray) { + NDRange adjusted = subarray; + + // normalize `adjusted` to a single hyper-row + memcpy( + adjusted[0].end_fixed(), + adjusted[0].start_fixed(), + adjusted[0].size() / 2); + + return arraydomain.tile_num(adjusted); +} + /** * Identifies the division of input cells into target fragments, * using `max_fragment_size_` as a hard limit on the target fragment size. @@ -1399,13 +1421,19 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( stats_->add_counter("tile_num", tile_num); } + uint64_t running_tiles_size = current_fragment_size_; uint64_t fragment_size = current_fragment_size_; + uint64_t fragment_start = 0; + std::optional fragment_end; std::vector> fragments; + const uint64_t hyperrow_num_tiles = + (dense() ? compute_hyperrow_num_tiles( + array_schema_.domain(), subarray_.ndrange(0)) : + 1); + // Make sure we don't write more than the desired fragment size. - // FIXME: for dense array this has to be aligned to a "hyper-row" - // so that we can have for (uint64_t t = 0; t < tile_num; t++) { uint64_t tile_size = 0; for (uint64_t a = 0; a < buf_names.size(); a++) { @@ -1431,20 +1459,30 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( } } - if (fragment_size + tile_size > max_fragment_size_) { - if (fragment_size == 0) { + if (running_tiles_size + tile_size > max_fragment_size_) { + if (running_tiles_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to write a single tile"); + } else if (!fragment_end.has_value()) { + throw GlobalOrderWriterException( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); } + fragments.push_back(std::make_pair(fragment_size, fragment_start)); - fragment_size = 0; - fragment_start = t; + + running_tiles_size = 0; + fragment_start = fragment_end.value(); + fragment_end.reset(); + } else if (((t + 1) - fragment_start) % hyperrow_num_tiles == 0) { + fragment_size = running_tiles_size + tile_size; + fragment_end = t + 1; } - fragment_size += tile_size; + running_tiles_size += tile_size; } - fragments.push_back(std::make_pair(fragment_size, fragment_start)); + fragments.push_back(std::make_pair(running_tiles_size, fragment_start)); return fragments; } @@ -1465,23 +1503,9 @@ static NDRange domain_tile_offset( const NDRange& domain, uint64_t start_tile, uint64_t num_tiles) { - // if a hyper-rectangle is a generalization of a rectangle to N dimensions, - // then let's say a "hyper-row" is a generalization of a row to N dimensions, - // i.e. a hyper-rectangle whose length is 1 in the outer-most dimension - - // compute difference so we can determine number of tiles per hyper-row const uint64_t domain_num_tiles = arraydomain.tile_num(domain); - - NDRange adjusted = domain; - - // normalize `adjusted` to a single hyper-row so that we can compute number of - // tiles per hyper-row, and thus the number of hyper-rows in the domain - memcpy( - adjusted[0].end_fixed(), - adjusted[0].start_fixed(), - adjusted[0].size() / 2); - - const uint64_t hyperrow_num_tiles = arraydomain.tile_num(adjusted); + const uint64_t hyperrow_num_tiles = + compute_hyperrow_num_tiles(arraydomain, domain); iassert(domain_num_tiles % hyperrow_num_tiles == 0); iassert(start_tile % hyperrow_num_tiles == 0); iassert(num_tiles % hyperrow_num_tiles == 0); @@ -1490,10 +1514,27 @@ static NDRange domain_tile_offset( const uint64_t num_hyperrows = num_tiles / hyperrow_num_tiles; iassert(num_hyperrows > 0); + NDRange adjusted = domain; + auto fix_bounds = [&](T) { - *static_cast(adjusted[0].start_fixed()) += start_hyperrow; - *static_cast(adjusted[0].end_fixed()) += - start_hyperrow + num_hyperrows - 1; + const T extent = arraydomain.tile_extent(0).rvalue_as(); + T* start = static_cast(adjusted[0].start_fixed()); + T* end = static_cast(adjusted[0].end_fixed()); + if (start_tile == 0) { + // first hyperrow - the start is the same, align the end to the bottom of + // the tile + *end = ((*start + extent) / extent) * extent - 1; + } else if (start_tile + num_tiles < num_tiles) { + // intermediate hyperrow - advance hyperrow of tiles and align bounds to + // the start/end of the tile + *start += ((extent * start_hyperrow) / extent) * extent; + *end = *start + extent - 1; + } else { + // final hyperrow - advance to the final hyperrow of tiles, the start is + // tile aligned, the end is not + *start += ((extent * start_hyperrow) / extent) * extent; + *end = *static_cast(domain[0].end_fixed()); + } }; apply_with_type(fix_bounds, arraydomain.dimension_ptr(0)->type()); From 091e8ed205ba776f7f6a7e2a2a1cf7ca4f318289 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 30 Sep 2025 11:09:28 -0400 Subject: [PATCH 015/109] Fix tile bounds for larger subarray inputs --- test/src/unit-cppapi-max-fragment-size.cc | 36 +++++++++++++------ .../sm/query/writers/global_order_writer.cc | 26 ++++++-------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 74ac9b1b430..35f08a0f748 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -700,30 +700,46 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { using Dim = templates::Dimension; using Dom = templates::Domain; - const uint64_t extent_d1 = GENERATE(8, 4); - constexpr size_t span_d2 = 10000; - REQUIRE(span_d2 % extent_d1 == 0); + const uint64_t d1_extent = GENERATE(8, 4); + constexpr size_t d2_span = 10000; + REQUIRE(d2_span % d1_extent == 0); // for test setup const std::vector dimensions = { - Dim(0, std::numeric_limits::max() - 1, extent_d1), - Dim(0, span_d2 - 1, span_d2 / extent_d1)}; + Dim(0, std::numeric_limits::max() - 1, d1_extent), + Dim(0, d2_span - 1, d2_span / d1_extent)}; - const uint64_t base_d1 = 100; + const uint64_t d1_start_offset = GENERATE(0, 1); + const uint64_t d1_end_offset = GENERATE(0, 1); + const uint64_t d1_start = 100 + d1_start_offset; + const uint64_t d1_end = d1_start + 15 - d1_end_offset; const std::vector subarray = { - Dom(base_d1 + 0, base_d1 + 7), Dom(0, span_d2 - 1)}; + Dom(d1_start, d1_end), Dom(0, d2_span - 1)}; const uint64_t max_fragment_size = 4 * 64 * 1024; - if (extent_d1 == 8) { + if (d1_extent == 8) { const auto expect = Catch::Matchers::ContainsSubstring( "Fragment size is too small to subdivide dense subarray into " "multiple fragments"); REQUIRE_THROWS(instance_dense_global_order( ctx, max_fragment_size, dimensions, subarray)); + } else if (d1_start_offset + d1_end_offset > 0) { + // if this constraint is ever relaxed this test must be extended + // with new inputs which are offset within a tile + const auto expect = Catch::Matchers::ContainsSubstring( + "the subarray must coincide with the tile bounds"); + REQUIRE_THROWS(instance_dense_global_order( + ctx, max_fragment_size, dimensions, subarray)); } else { const std::vector> expect = { - {Dom(base_d1 + 0, base_d1 + 3), Dom(0, span_d2 - 1)}, - {Dom(base_d1 + 4, base_d1 + 7), Dom(0, span_d2 - 1)}}; + {Dom(d1_start + 0 * d1_extent, d1_start + 1 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 1 * d1_extent, d1_start + 2 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 2 * d1_extent, d1_start + 3 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 3 * d1_extent, d1_start + 4 * d1_extent - 1), + Dom(0, d2_span - 1)}}; const auto actual = instance_dense_global_order( ctx, max_fragment_size, dimensions, subarray); diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index a10ea31cb5b..cfea4a321df 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1471,7 +1471,9 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( fragments.push_back(std::make_pair(fragment_size, fragment_start)); - running_tiles_size = 0; + iassert(running_tiles_size >= fragment_size); + running_tiles_size -= fragment_size; + fragment_start = fragment_end.value(); fragment_end.reset(); } else if (((t + 1) - fragment_start) % hyperrow_num_tiles == 0) { @@ -1518,23 +1520,15 @@ static NDRange domain_tile_offset( auto fix_bounds = [&](T) { const T extent = arraydomain.tile_extent(0).rvalue_as(); + const T lower_bound = *static_cast(domain[0].start_fixed()); + const T upper_bound = *static_cast(domain[0].end_fixed()); T* start = static_cast(adjusted[0].start_fixed()); T* end = static_cast(adjusted[0].end_fixed()); - if (start_tile == 0) { - // first hyperrow - the start is the same, align the end to the bottom of - // the tile - *end = ((*start + extent) / extent) * extent - 1; - } else if (start_tile + num_tiles < num_tiles) { - // intermediate hyperrow - advance hyperrow of tiles and align bounds to - // the start/end of the tile - *start += ((extent * start_hyperrow) / extent) * extent; - *end = *start + extent - 1; - } else { - // final hyperrow - advance to the final hyperrow of tiles, the start is - // tile aligned, the end is not - *start += ((extent * start_hyperrow) / extent) * extent; - *end = *static_cast(domain[0].end_fixed()); - } + + auto align = [extent](T value) -> T { return (value / extent) * extent; }; + + *start = std::max(lower_bound, align(*start + extent * start_hyperrow)); + *end = std::min(upper_bound, align(*start + extent * num_hyperrows) - 1); }; apply_with_type(fix_bounds, arraydomain.dimension_ptr(0)->type()); From 6a8fc4e1c68396074b194255384a03b06ecf3cc8 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 30 Sep 2025 11:27:20 -0400 Subject: [PATCH 016/109] Fix 'Row tiles' section for larger inputs --- test/src/unit-cppapi-max-fragment-size.cc | 12 +++++++----- tiledb/sm/query/writers/global_order_writer.cc | 8 ++++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 35f08a0f748..e7ae77093cb 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -683,16 +683,18 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { Dim(0, span_d2 - 1, span_d2)}; const uint64_t base_d1 = 12345; + const uint64_t num_rows = GENERATE(1, 2, 4, 8); const std::vector subarray = { - Dom(base_d1 + 0, base_d1 + 1), Dom(0, span_d2 - 1)}; - - const std::vector> expect = { - {Dom(base_d1 + 0, base_d1 + 0), Dom(0, span_d2 - 1)}, - {Dom(base_d1 + 1, base_d1 + 1), Dom(0, span_d2 - 1)}}; + Dom(base_d1 + 0, base_d1 + num_rows - 1), Dom(0, span_d2 - 1)}; const auto actual = instance_dense_global_order( ctx, 64 * 1024, dimensions, subarray); + std::vector> expect; + for (uint64_t r = 0; r < num_rows; r++) { + expect.push_back({Dom(base_d1 + r, base_d1 + r), Dom(0, span_d2 - 1)}); + } + CHECK(expect == actual); } diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index cfea4a321df..e0145caba97 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1459,7 +1459,9 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( } } - if (running_tiles_size + tile_size > max_fragment_size_) { + // NB: normally this should only hit once, but if there is a single + // tile larger than the max fragment size it could hit twice and error + while (running_tiles_size + tile_size > max_fragment_size_) { if (running_tiles_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to write a single tile"); @@ -1476,7 +1478,9 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( fragment_start = fragment_end.value(); fragment_end.reset(); - } else if (((t + 1) - fragment_start) % hyperrow_num_tiles == 0) { + } + + if (((t + 1) - fragment_start) % hyperrow_num_tiles == 0) { fragment_size = running_tiles_size + tile_size; fragment_end = t + 1; } From 520f344a7bed0b4dbe2381c48b3746fc6db98363 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 30 Sep 2025 22:17:49 -0400 Subject: [PATCH 017/109] Dimension::num_tiles --- test/support/src/array_templates.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 6fa20b1680a..9466b7b5186 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -219,6 +219,7 @@ struct Domain { template struct Dimension { using value_type = tiledb::type::datatype_traits::value_type; + using domain_type = Domain; Dimension() = default; Dimension(Domain domain, value_type extent) @@ -232,6 +233,20 @@ struct Dimension { Domain domain; value_type extent; + + /** + * @return the number of tiles spanned by the whole domain of this dimension + */ + uint64_t num_tiles() const { + return num_tiles(domain); + } + + /** + * @return the number of tiles spanned by a range in this dimension + */ + uint64_t num_tiles(const domain_type& range) const { + return (range.num_cells() + extent - 1) / extent; + } }; template <> From 150ab8d6ab338a65a112475dcc6a7bcd6d2b3df1 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 30 Sep 2025 22:18:39 -0400 Subject: [PATCH 018/109] rapidcheck fix Domain show linking issue and add some more specializations --- test/support/rapidcheck/array_templates.h | 14 ++++++-- test/support/rapidcheck/show.cc | 40 +++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/test/support/rapidcheck/array_templates.h b/test/support/rapidcheck/array_templates.h index f2c1dacc232..c3caded4884 100644 --- a/test/support/rapidcheck/array_templates.h +++ b/test/support/rapidcheck/array_templates.h @@ -307,9 +307,17 @@ Gen> make_fragment_3d( }); } -void showValue(const templates::Domain& domain, std::ostream& os); -void showValue(const templates::Domain& domain, std::ostream& os); -void showValue(const templates::Domain& domain, std::ostream& os); +template <> +void show>(const templates::Domain& domain, std::ostream& os); + +template <> +void show>( + const templates::Domain& domain, std::ostream& os); + +template <> +void show>( + const templates::Dimension& dimension, + std::ostream& os); namespace detail { diff --git a/test/support/rapidcheck/show.cc b/test/support/rapidcheck/show.cc index f3aeb2426db..8b5c0146790 100644 --- a/test/support/rapidcheck/show.cc +++ b/test/support/rapidcheck/show.cc @@ -32,8 +32,11 @@ * header files. */ +#include #include +#include +#include "test/support/src/array_templates.h" #include "tiledb/sm/enums/query_condition_op.h" #include "tiledb/sm/query/ast/query_ast.h" @@ -77,3 +80,40 @@ void showValue(const tiledb::sm::ASTNode& node, std::ostream& os) { } } // namespace rc::detail + +namespace rc { + +template +void showImpl( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + os << "[" << domain.lower_bound << ", " << domain.upper_bound << "]"; +} + +template <> +void show>( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + showImpl(domain, os); +} + +template <> +void show>( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + showImpl(domain, os); +} + +template +void showImpl( + const tiledb::test::templates::Dimension
& dimension, std::ostream& os) { + os << "{\"domain\": "; + showImpl(dimension.domain, os); + os << ", \"extent\": " << dimension.extent << "}"; +} + +template <> +void show>( + const templates::Dimension& dimension, + std::ostream& os) { + showImpl(dimension, os); +} + +} // namespace rc From 5b186e4d4e6a311a816bc7cd0c4240799ed1ae9f Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 1 Oct 2025 22:41:23 -0400 Subject: [PATCH 019/109] Query::CoordsInfo constructor --- tiledb/sm/query/query.cc | 12 +++++++----- tiledb/sm/query/query.h | 2 ++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 0fbd84db127..5fa194b2b43 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -83,6 +83,13 @@ static uint64_t get_effective_memory_budget( /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ +Query::CoordsInfo::CoordsInfo() + : has_coords_(false) + , coords_buffer_(nullptr) + , coords_buffer_size_(nullptr) + , coords_num_(0) { +} + Query::Query( ContextResources& resources, CancellationSource cancellation_source, @@ -141,11 +148,6 @@ Query::Query( fragment_metadata_ = array->fragment_metadata(); - coords_info_.coords_buffer_ = nullptr; - coords_info_.coords_buffer_size_ = nullptr; - coords_info_.coords_num_ = 0; - coords_info_.has_coords_ = false; - callback_ = nullptr; callback_data_ = nullptr; status_ = QueryStatus::UNINITIALIZED; diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index 5d39ed133cf..4da5fe8444f 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -127,6 +127,8 @@ class Query { /** Keeps track of the number of coordinates across coordinate buffers. */ uint64_t coords_num_; + + CoordsInfo(); }; /* ********************************* */ From 4becf64398538ea038b0acf195dc8cb8a75acd04 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 1 Oct 2025 22:41:52 -0400 Subject: [PATCH 020/109] FragmentMetadata::fragment_meta_size --- tiledb/sm/fragment/fragment_metadata.cc | 20 ++++++++++++-------- tiledb/sm/fragment/fragment_metadata.h | 5 ++++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index fc5681a65c1..63534d7c934 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -673,14 +673,7 @@ uint64_t FragmentMetadata::fragment_size() const { for (const auto& file_validity_size : file_validity_sizes_) size += file_validity_size; - // The fragment metadata file size can be empty when we've loaded consolidated - // metadata - uint64_t meta_file_size = meta_file_size_; - if (meta_file_size == 0) { - auto meta_uri = fragment_uri_.join_path( - std::string(constants::fragment_metadata_filename)); - meta_file_size = resources_->vfs().file_size(meta_uri); - } + const uint64_t meta_file_size = fragment_meta_size(); // Validate that the meta_file_size is not zero, either preloaded or fetched // above iassert(meta_file_size != 0); @@ -691,6 +684,17 @@ uint64_t FragmentMetadata::fragment_size() const { return size; } +uint64_t FragmentMetadata::fragment_meta_size() const { + // The fragment metadata file size can be empty when we've loaded consolidated + // metadata + if (meta_file_size_ == 0) { + auto meta_uri = fragment_uri_.join_path( + std::string(constants::fragment_metadata_filename)); + meta_file_size_ = resources_->vfs().file_size(meta_uri); + } + return meta_file_size_; +} + void FragmentMetadata::init_domain(const NDRange& non_empty_domain) { auto& domain{array_schema_->domain()}; diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index 931d6d0099f..182e63a660d 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -253,6 +253,9 @@ class FragmentMetadata { /** Retrieves the fragment size. */ uint64_t fragment_size() const; + /** @return the size of the metadata file */ + uint64_t fragment_meta_size() const; + /** * Returns true if the corresponding fragment is dense, and false if it * is sparse. @@ -898,7 +901,7 @@ class FragmentMetadata { uint64_t sparse_tile_num_; /** The size of the fragment metadata file. */ - uint64_t meta_file_size_; + mutable uint64_t meta_file_size_; /** Local mutex for thread-safety. */ std::mutex mtx_; From c99a104a1bb0bf35cdcdab832e16e6a25e6c3c49 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 1 Oct 2025 22:42:11 -0400 Subject: [PATCH 021/109] rc::make_dimension with extent bound --- test/support/rapidcheck/array_templates.h | 39 +++++++++++++++-------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/test/support/rapidcheck/array_templates.h b/test/support/rapidcheck/array_templates.h index c3caded4884..2235e0b1112 100644 --- a/test/support/rapidcheck/array_templates.h +++ b/test/support/rapidcheck/array_templates.h @@ -87,7 +87,8 @@ std::optional checked_sub(T a, T b) { } template -Gen make_extent(const templates::Domain& domain) { +Gen make_extent( + const templates::Domain& domain, std::optional bound = std::nullopt) { // upper bound on all possible extents to avoid unreasonably // huge tile sizes static constexpr D extent_limit = static_cast( @@ -99,6 +100,10 @@ Gen make_extent(const templates::Domain& domain) { static_cast(std::numeric_limits::max()), static_cast(1024 * 16))); + const D extent_bound = + (bound.has_value() ? std::min(bound.value(), extent_limit) : + extent_limit); + // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is // inclusive. So we have to be careful to avoid overflow. @@ -109,27 +114,35 @@ Gen make_extent(const templates::Domain& domain) { checked_sub(domain.upper_bound, domain.lower_bound); if (bound_distance.has_value()) { extent_upper_bound = - (bound_distance.value() < extent_limit ? bound_distance.value() + 1 : - extent_limit); + (bound_distance.value() < extent_bound ? bound_distance.value() + 1 : + extent_bound); } else { - extent_upper_bound = extent_limit; + extent_upper_bound = extent_bound; } return gen::inRange(extent_lower_bound, extent_upper_bound + 1); } +template +Gen> make_dimension( + std::optional::value_type> extent_bound = + std::nullopt) { + using CoordType = templates::Dimension::value_type; + auto tup = gen::mapcat( + gen::arbitrary>(), + [extent_bound](Domain domain) { + return gen::pair(gen::just(domain), make_extent(domain, extent_bound)); + }); + + return gen::map(tup, [](std::pair, CoordType> tup) { + return templates::Dimension(tup.first, tup.second); + }); +} + template struct Arbitrary> { static Gen> arbitrary() { - using CoordType = templates::Dimension::value_type; - auto tup = gen::mapcat( - gen::arbitrary>(), [](Domain domain) { - return gen::pair(gen::just(domain), make_extent(domain)); - }); - - return gen::map(tup, [](std::pair, CoordType> tup) { - return templates::Dimension(tup.first, tup.second); - }); + return make_dimension(); } }; From 5472d205016e536e266da9238308fa76d89fb8ce Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 1 Oct 2025 22:43:06 -0400 Subject: [PATCH 022/109] Add rapidcheck 2d test and some fixes for issues found --- test/src/unit-cppapi-max-fragment-size.cc | 147 +++++++++++++++++- .../sm/query/writers/global_order_writer.cc | 6 +- 2 files changed, 149 insertions(+), 4 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index e7ae77093cb..025b18b4546 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -32,9 +32,12 @@ #include #include +#include +#include "test/support/rapidcheck/array_templates.h" #include "test/support/src/array_helpers.h" #include "test/support/src/array_templates.h" #include "test/support/src/helpers.h" +#include "tiledb/api/c_api/fragment_info/fragment_info_api_internal.h" #include "tiledb/common/arithmetic.h" #include "tiledb/common/scoped_executor.h" #include "tiledb/common/stdx_string.h" @@ -605,15 +608,18 @@ instance_dense_global_order( ASSERTER(st == Query::Status::COMPLETE); } - ASSERTER(a_read == a_write); - FragmentInfo finfo(ctx, array_name); finfo.load(); // validate fragment size for (uint32_t f = 0; f < finfo.fragment_num(); f++) { const uint64_t fsize = finfo.fragment_size(f); - ASSERTER(fsize <= max_fragment_size); + const uint64_t fmetasize = finfo.ptr() + ->fragment_info() + ->single_fragment_info_vec()[f] + .meta() + ->fragment_meta_size(); + ASSERTER(fsize <= max_fragment_size + fmetasize); } // collect fragment domains @@ -664,6 +670,9 @@ instance_dense_global_order( } } + // this is last because a fragment domain mismatch is more informative + ASSERTER(a_read == a_write); + return fragment_domains; } @@ -749,4 +758,136 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { CHECK(expect == actual); } } + + // examples found from the rapidcheck test + SECTION("Shrinking") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + SECTION("Example 1") { + Dim d1(0, 0, 1); + Dim d2(0, 0, 1); + Dom s1(0, 0); + Dom s2(0, 0); + const uint64_t max_fragment_size = 24; + + instance_dense_global_order( + ctx, max_fragment_size, {d1, d2}, {s1, s2}); + } + + SECTION("Example 2") { + Dim d1(1, 26, 2); + Dim d2(0, 0, 1); + Dom s1(1, 2); + Dom s2(0, 0); + const uint64_t max_fragment_size = 28; + + instance_dense_global_order( + ctx, max_fragment_size, {d1, d2}, {s1, s2}); + } + } +} + +namespace rc { +template +Gen::domain_type>> +make_tile_aligned_subarray( + const std::vector>& arraydomain) { + using Dom = typename templates::Dimension::domain_type; + + // dense subarrays have to be aligned to tile boundaries + // so choose the tiles in each dimension that the subarray will overlap + std::vector>> gen_subarray_tiles; + for (const auto& dimension : arraydomain) { + const uint64_t tile_ub = + (dimension.domain.upper_bound - dimension.domain.lower_bound) / + dimension.extent; + gen_subarray_tiles.push_back( + make_range(templates::Domain(0, tile_ub))); + } + + return gen::exec([gen_subarray_tiles, arraydomain]() { + std::vector> subarray_tiles; + for (const auto& gen_dim : gen_subarray_tiles) { + subarray_tiles.push_back(*gen_dim); + } + + std::vector subarray; + auto to_subarray = [&]() -> std::vector& { + subarray.clear(); + for (uint64_t d = 0; d < arraydomain.size(); d++) { + subarray.push_back(Dom( + arraydomain[d].domain.lower_bound + + subarray_tiles[d].lower_bound * arraydomain[d].extent, + arraydomain[d].domain.lower_bound + + (subarray_tiles[d].upper_bound + 1) * arraydomain[d].extent - + 1)); + } + return subarray; + }; + + uint64_t num_cells_per_tile = 1; + for (const auto& dim : arraydomain) { + num_cells_per_tile *= dim.extent; + } + + // clamp to a hopefully reasonable limit + // avoid too many cells, and avoid too many tiles + std::optional num_cells; + while (!(num_cells = subarray_num_cells(to_subarray())).has_value() || + num_cells.value() >= 1024 * 1024 * 4 || + (num_cells.value() / num_cells_per_tile) >= 16 * 1024) { + for (uint64_t d = subarray.size(); d > 0; --d) { + auto& dtiles = subarray_tiles[d - 1]; + if (dtiles.num_cells() > 4) { + dtiles.upper_bound = (dtiles.lower_bound + dtiles.upper_bound) / 2; + break; + } + } + } + + return to_subarray(); + }); +} + +} // namespace rc + +TEST_CASE( + "C++ API: Max fragment size dense array rapidcheck 2d", + "[cppapi][max-frag-size][rapidcheck]") { + Context ctx; + rc::prop("max fragment size dense 2d", [ctx]() { + static constexpr auto DT = sm::Datatype::UINT64; + templates::Dimension
d1 = *rc::make_dimension
(512); + templates::Dimension
d2 = *rc::make_dimension
(512); + const std::optional num_cells_per_tile = + checked_arithmetic::mul(d1.extent, d2.extent); + RC_PRE(num_cells_per_tile.has_value()); + RC_PRE(num_cells_per_tile.value() <= 1024 * 128); + + const uint64_t estimate_single_tile_fragment_size = + num_cells_per_tile.value() * sizeof(int) // data + + sizeof(uint64_t) // prefix containing the number of chunks + + 3 * sizeof(uint32_t); // chunk sizes + + const auto subarray = + *rc::make_tile_aligned_subarray({d1, d2}); + + const uint64_t num_tiles_per_hyperrow = d2.num_tiles(subarray[1]); + const uint64_t max_fragment_size = *rc::gen::inRange(1, 8) * + num_tiles_per_hyperrow * + estimate_single_tile_fragment_size; + + std::cerr << std::endl << "d1: "; + rc::show(d1, std::cerr); + std::cerr << std::endl << "d2: "; + rc::show(d2, std::cerr); + std::cerr << std::endl << "subarray: "; + rc::show(subarray, std::cerr); + std::cerr << std::endl + << "max_fragment_size: " << max_fragment_size << std::endl; + + instance_dense_global_order( + ctx, max_fragment_size, {d1, d2}, subarray); + }); } diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index e0145caba97..163315d4b13 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1529,7 +1529,11 @@ static NDRange domain_tile_offset( T* start = static_cast(adjusted[0].start_fixed()); T* end = static_cast(adjusted[0].end_fixed()); - auto align = [extent](T value) -> T { return (value / extent) * extent; }; + // tiles begin at [LB, LB + E, LB + 2E, ...] where LB is lower bound, E is + // extent + auto align = [lower_bound, extent](T value) -> T { + return lower_bound + ((value - lower_bound) / extent) * extent; + }; *start = std::max(lower_bound, align(*start + extent * start_hyperrow)); *end = std::min(upper_bound, align(*start + extent * num_hyperrows) - 1); From 46fda112ed82a2fa28f3161b935cf6b45365025f Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Thu, 2 Oct 2025 11:18:04 -0400 Subject: [PATCH 023/109] Add doc for Attribute::set_fill_value overload --- tiledb/sm/cpp_api/attribute.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tiledb/sm/cpp_api/attribute.h b/tiledb/sm/cpp_api/attribute.h index 03ca2d696ef..c6a65b75c13 100644 --- a/tiledb/sm/cpp_api/attribute.h +++ b/tiledb/sm/cpp_api/attribute.h @@ -275,6 +275,34 @@ class Attribute { return *this; } + /** + * Sets the default fill value for the input attribute. This value will + * be used for the input attribute whenever querying (1) an empty cell in + * a dense array, or (2) a non-empty cell (in either dense or sparse array) + * when values on the input attribute are missing (e.g., if the user writes + * a subset of the attributes in a write operation). + * + * Applicable to fixed-sized attributes only. + * + * **Example:** + * + * @code{.c} + * tiledb::Context ctx; + * + * // Fixed-sized attribute + * auto a1 = tiledb::Attribute::create(ctx, "a1"); + * a1.set_fill_value(0); + * @endcode + * + * @param value The fill value to set. + * + * @note A call to `cell_val_num` sets the fill value + * of the attribute to its default. Therefore, make sure you invoke + * `set_fill_value` after deciding on the number + * of values this attribute will hold in each cell. + * + * @note The input `size` should be equal to the cell size. + */ template Attribute& set_fill_value(T value) { return set_fill_value(static_cast(&value), sizeof(T)); From eb52b4fb99a255bd91e1b72641dc33e5940586e9 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Thu, 2 Oct 2025 11:19:25 -0400 Subject: [PATCH 024/109] Fix only fragment happening during finalize (for sparse) --- .../sm/query/writers/global_order_writer.cc | 40 +++++++++++++------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 163315d4b13..adaaec40b1c 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -623,16 +623,23 @@ Status GlobalOrderWriter::compute_coord_dups( Status GlobalOrderWriter::finalize_global_write_state() { iassert(layout_ == Layout::GLOBAL_ORDER, "layout = {}", layout_str(layout_)); - auto meta = global_write_state_->frag_meta_; - const auto& uri = meta->fragment_uri(); // Handle last tile Status st = global_write_handle_last_tile(); if (!st.ok()) { - throw_if_not_ok(close_files(meta)); + if (global_write_state_->frag_meta_) { + throw_if_not_ok(close_files(global_write_state_->frag_meta_)); + } return st; } + auto meta = global_write_state_->frag_meta_; + if (!meta) { + return Status::Ok(); + } + + const auto& uri = meta->fragment_uri(); + // Close all files RETURN_NOT_OK(close_files(meta)); @@ -815,8 +822,15 @@ Status GlobalOrderWriter::global_write_handle_last_tile() { if (cell_num_last_tiles == 0) return Status::Ok(); + // if we haven't started a fragment yet, now is the time + if (!global_write_state_->frag_meta_) { + iassert(!dense()); // FIXME: probably not true + RETURN_CANCEL_OR_ERROR(start_new_fragment(0, 0)); + } + // Reserve space for the last tile in the fragment metadata auto meta = global_write_state_->frag_meta_; + iassert(meta); meta->set_num_tiles(meta->tile_index_base() + 1); // Filter last tiles @@ -1563,17 +1577,17 @@ Status GlobalOrderWriter::start_new_fragment( frag_meta->store(array_->get_encryption_key()); frag_uris_to_commit_.emplace_back(uri); - } - // Make a new fragment URI. - const auto write_version = array_->array_schema_latest().write_version(); - auto frag_dir_uri = - array_->array_directory().get_fragments_dir(write_version); - auto new_fragment_str = storage_format::generate_timestamped_name( - fragment_timestamp_range_.first, - fragment_timestamp_range_.second, - write_version); - fragment_uri_ = frag_dir_uri.join_path(new_fragment_str); + // Make a new fragment URI. + const auto write_version = array_->array_schema_latest().write_version(); + auto frag_dir_uri = + array_->array_directory().get_fragments_dir(write_version); + auto new_fragment_str = storage_format::generate_timestamped_name( + fragment_timestamp_range_.first, + fragment_timestamp_range_.second, + write_version); + fragment_uri_ = frag_dir_uri.join_path(new_fragment_str); + } // Set domain of new fragment if needed std::optional new_fragment_domain; From 929432bdad0f36019520670f6c2b863925280f40 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Thu, 2 Oct 2025 12:17:34 -0400 Subject: [PATCH 025/109] Add test function comments --- test/src/unit-cppapi-max-fragment-size.cc | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 025b18b4546..db0aafb81bd 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -512,6 +512,10 @@ TEST_CASE( array.close(); } +/** + * @return the number of cells contained within a subarray, or `std::nullopt` if + * overflow + */ std::optional subarray_num_cells( std::span> subarray) { uint64_t num_cells = 1; @@ -525,6 +529,16 @@ std::optional subarray_num_cells( return num_cells; } +/** + * Creates an array with the provided `dimensions` and then + * runs a global order write into `subarray` using `max_fragment_size` to bound + * the fragment size. + * + * Asserts that all created fragments respect `max_fragment_size` and that the + * data read back out for `subarray` matches what we wrote into it. + * + * @return a list of the domains written to each fragment in ascending order + */ template std::vector>> instance_dense_global_order( @@ -676,12 +690,17 @@ instance_dense_global_order( return fragment_domains; } +/** + * Tests that the max fragment size parameter is properly respected + * for global order writes to dense arrays. + */ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const std::string array_name = "cppapi_consolidation_dense_domain_arithmetic_overflow"; Context ctx; + // each tile is a full row of a 2D array SECTION("Row tiles") { using Dim = templates::Dimension; using Dom = templates::Domain; @@ -707,6 +726,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { CHECK(expect == actual); } + // each tile is some rectangle of a 2D array SECTION("Rectangle tiles") { using Dim = templates::Dimension; using Dom = templates::Domain; @@ -788,6 +808,10 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { } } +/** + * @return a generator which prdocues subarrays whose bounds are aligned to the + * tiles of `arraydomain` + */ namespace rc { template Gen::domain_type>> From b6a3953134d12148f985066c2e04512dffb153f0 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Thu, 2 Oct 2025 12:18:05 -0400 Subject: [PATCH 026/109] Fix weird uninitialized value false positive with fragment_end --- .../sm/query/writers/global_order_writer.cc | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index adaaec40b1c..08c0858650d 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1438,14 +1438,14 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( uint64_t running_tiles_size = current_fragment_size_; uint64_t fragment_size = current_fragment_size_; - uint64_t fragment_start = 0; - std::optional fragment_end; + uint64_t fragment_start = 0, fragment_end = 0; std::vector> fragments; - const uint64_t hyperrow_num_tiles = - (dense() ? compute_hyperrow_num_tiles( - array_schema_.domain(), subarray_.ndrange(0)) : - 1); + std::optional hyperrow_num_tiles; + if (dense()) { + hyperrow_num_tiles = compute_hyperrow_num_tiles( + array_schema_.domain(), subarray_.ndrange(0)); + } // Make sure we don't write more than the desired fragment size. for (uint64_t t = 0; t < tile_num; t++) { @@ -1479,7 +1479,7 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( if (running_tiles_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to write a single tile"); - } else if (!fragment_end.has_value()) { + } else if (fragment_end == 0) { throw GlobalOrderWriterException( "Fragment size is too small to subdivide dense subarray into " "multiple fragments"); @@ -1490,11 +1490,12 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( iassert(running_tiles_size >= fragment_size); running_tiles_size -= fragment_size; - fragment_start = fragment_end.value(); - fragment_end.reset(); + fragment_start = fragment_end; + fragment_end = 0; } - if (((t + 1) - fragment_start) % hyperrow_num_tiles == 0) { + if (!hyperrow_num_tiles.has_value() || + ((t + 1) - fragment_start) % hyperrow_num_tiles.value() == 0) { fragment_size = running_tiles_size + tile_size; fragment_end = t + 1; } From 35ca55ffa33497bf8451954e3107bd21b9b41465 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Thu, 2 Oct 2025 22:55:23 -0400 Subject: [PATCH 027/109] Fixes to rapidcheck test --- test/src/unit-cppapi-max-fragment-size.cc | 33 ++++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index db0aafb81bd..12eff013ee5 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -44,6 +44,7 @@ #include "tiledb/sm/c_api/tiledb_struct_def.h" #include "tiledb/sm/cpp_api/tiledb" #include "tiledb/sm/misc/constants.h" +#include "tiledb/sm/tile/tile.h" #include @@ -826,8 +827,8 @@ make_tile_aligned_subarray( const uint64_t tile_ub = (dimension.domain.upper_bound - dimension.domain.lower_bound) / dimension.extent; - gen_subarray_tiles.push_back( - make_range(templates::Domain(0, tile_ub))); + gen_subarray_tiles.push_back(make_range( + templates::Domain(0, std::min(64, tile_ub)))); } return gen::exec([gen_subarray_tiles, arraydomain]() { @@ -855,7 +856,7 @@ make_tile_aligned_subarray( num_cells_per_tile *= dim.extent; } - // clamp to a hopefully reasonable limit + // clamp to a hopefully reasonable limit (if the other attempts failed) // avoid too many cells, and avoid too many tiles std::optional num_cells; while (!(num_cells = subarray_num_cells(to_subarray())).has_value() || @@ -882,25 +883,37 @@ TEST_CASE( Context ctx; rc::prop("max fragment size dense 2d", [ctx]() { static constexpr auto DT = sm::Datatype::UINT64; - templates::Dimension
d1 = *rc::make_dimension
(512); - templates::Dimension
d2 = *rc::make_dimension
(512); + templates::Dimension
d1 = *rc::make_dimension
(128); + templates::Dimension
d2 = *rc::make_dimension
(128); const std::optional num_cells_per_tile = checked_arithmetic::mul(d1.extent, d2.extent); RC_PRE(num_cells_per_tile.has_value()); RC_PRE(num_cells_per_tile.value() <= 1024 * 128); + const uint64_t tile_size = num_cells_per_tile.value() * sizeof(int); + const uint64_t filter_chunk_size = + sm::WriterTile::compute_chunk_size(tile_size, sizeof(int)); + const uint64_t num_filter_chunks_per_tile = + (tile_size + filter_chunk_size - 1) / filter_chunk_size; + const uint64_t estimate_single_tile_fragment_size = num_cells_per_tile.value() * sizeof(int) // data - + sizeof(uint64_t) // prefix containing the number of chunks - + 3 * sizeof(uint32_t); // chunk sizes + + sizeof(uint64_t) // prefix containing the number of chunks + + num_filter_chunks_per_tile * 3 * sizeof(uint32_t); // chunk sizes const auto subarray = *rc::make_tile_aligned_subarray({d1, d2}); const uint64_t num_tiles_per_hyperrow = d2.num_tiles(subarray[1]); - const uint64_t max_fragment_size = *rc::gen::inRange(1, 8) * - num_tiles_per_hyperrow * - estimate_single_tile_fragment_size; + + auto gen_fragment_size = rc::gen::map( + rc::gen::inRange(1, 8), + [num_tiles_per_hyperrow, + estimate_single_tile_fragment_size](uint64_t scale) { + return num_tiles_per_hyperrow * estimate_single_tile_fragment_size * + scale; + }); + const uint64_t max_fragment_size = *gen_fragment_size; std::cerr << std::endl << "d1: "; rc::show(d1, std::cerr); From c01b3dd94cba2c341941cebcc66e1339c1809195 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 7 Oct 2025 12:47:24 -0400 Subject: [PATCH 028/109] NDRangeTypedAccess --- tiledb/sm/misc/types.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tiledb/sm/misc/types.h b/tiledb/sm/misc/types.h index 22b3ca2add2..914a35c9a15 100644 --- a/tiledb/sm/misc/types.h +++ b/tiledb/sm/misc/types.h @@ -47,6 +47,31 @@ namespace tiledb::sm { /** An N-dimensional range, consisting of a vector of 1D ranges. */ using NDRange = std::vector; +/** + * Helper functions for more concisely accessing or manipulating + * fields of a range with static typing. + * + * Useful with `apply_with_type`. + */ +template +struct NDRangeTypedAccess { + static T& lower_bound(NDRange& range, uint64_t dim) { + return *static_cast(range[dim].start_fixed()); + } + + static const T& lower_bound(const NDRange& range, uint64_t dim) { + return *static_cast(range[dim].start_fixed()); + } + + static T& upper_bound(NDRange& range, uint64_t dim) { + return *static_cast(range[dim].end_fixed()); + } + + static const T& upper_bound(const NDRange& range, uint64_t dim) { + return *static_cast(range[dim].end_fixed()); + } +}; + /** An untyped value, barely more than raw storage. This class is only * transitional. All uses should be rewritten to use ordinary types. Consider * it deprecated at creation. From 02b05af49fba5015b85031f6a202069bd28139ac Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 7 Oct 2025 12:48:03 -0400 Subject: [PATCH 029/109] Fix fragment metadata domain when resuming fragment from previous submit --- tiledb/sm/fragment/fragment_metadata.cc | 8 +++-- tiledb/sm/fragment/fragment_metadata.h | 6 ++++ .../sm/query/writers/global_order_writer.cc | 35 +++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index 63534d7c934..b3844f0d288 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -696,13 +696,17 @@ uint64_t FragmentMetadata::fragment_meta_size() const { } void FragmentMetadata::init_domain(const NDRange& non_empty_domain) { - auto& domain{array_schema_->domain()}; - // Sanity check iassert(!non_empty_domain.empty()); iassert(non_empty_domain_.empty()); iassert(domain_.empty()); + set_domain(non_empty_domain); +} + +void FragmentMetadata::set_domain(const NDRange& non_empty_domain) { + auto& domain{array_schema_->domain()}; + // Set non-empty domain for dense arrays (for sparse it will be calculated // via the MBRs) if (dense_) { diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index 182e63a660d..fe2e6e69a35 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -356,6 +356,12 @@ class FragmentMetadata { */ void init_domain(const NDRange& non_empty_domain); + /** + * Updates the fragment's internal domain and non-empty domain members. + * Validity of the argument is not checked so use with caution. + */ + void set_domain(const NDRange& non_empty_domain); + /** * Loads the basic metadata from storage or `f_buff` for later * versions if it is not `nullptr`. diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 08c0858650d..a8adf4c24fa 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -45,6 +45,7 @@ #include "tiledb/sm/misc/parallel_functions.h" #include "tiledb/sm/misc/tdb_math.h" #include "tiledb/sm/misc/tdb_time.h" +#include "tiledb/sm/misc/types.h" #include "tiledb/sm/query/hilbert_order.h" #include "tiledb/sm/query/query_macros.h" #include "tiledb/sm/stats/global_stats.h" @@ -60,6 +61,12 @@ using namespace tiledb::sm::stats; namespace tiledb::sm { +static NDRange domain_tile_offset( + const Domain& arraydomain, + const NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles); + class GlobalOrderWriterException : public StatusException { public: explicit GlobalOrderWriterException(const std::string& message) @@ -786,6 +793,34 @@ Status GlobalOrderWriter::global_write() { } else { if (f > 0 || !global_write_state_->frag_meta_) { RETURN_CANCEL_OR_ERROR(start_new_fragment(start_tile, num_tiles)); + } else { + // this means a resumed write of a previously started fragment + if (dense()) { + // in which case it is necessary to extend the domain + // (assumption which is true as of this writing: all dimensions of a + // dense domain have the same data type) + auto updated_domain = [&](T) -> NDRange { + using A = NDRangeTypedAccess; + const NDRange& written_domain = + global_write_state_->frag_meta_->domain(); + NDRange pending_range = subarray_.ndrange(0); + A::lower_bound(pending_range, 0) = + A::upper_bound(written_domain, 0) + 1; + + auto extended = domain_tile_offset( + array_schema_.domain(), + pending_range, + 0, + global_write_state_->frag_meta_->tile_index_base() + num_tiles); + A::lower_bound(extended, 0) = A::lower_bound(written_domain, 0); + return extended; + }; + + NDRange extended = apply_with_type( + updated_domain, array_schema_.domain().dimension_ptr(0)->type()); + + global_write_state_->frag_meta_->set_domain(extended); + } } auto frag_meta = global_write_state_->frag_meta_; From 7bf6229854c8caf0ae9232a804fb157212058320 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 13 Oct 2025 13:31:17 -0400 Subject: [PATCH 030/109] Fix writing with multiple submits --- test/src/unit-cppapi-max-fragment-size.cc | 116 ++++++++++------ .../sm/query/writers/global_order_writer.cc | 124 +++++++++--------- tiledb/sm/query/writers/global_order_writer.h | 19 ++- 3 files changed, 152 insertions(+), 107 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 12eff013ee5..de09b9e101a 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -546,7 +546,8 @@ instance_dense_global_order( const Context& ctx, uint64_t max_fragment_size, const std::vector>& dimensions, - const std::vector>& subarray) { + const std::vector>& subarray, + std::optional write_unit_num_cells = std::nullopt) { const std::string array_name = "max_fragment_size_dense_global_order"; const std::optional num_cells = subarray_num_cells(subarray); @@ -595,12 +596,21 @@ instance_dense_global_order( Query query(ctx, array, TILEDB_WRITE); query.set_layout(TILEDB_GLOBAL_ORDER); query.set_subarray(sub); - query.set_data_buffer("a", a_write); - query.ptr().get()->query_->set_fragment_size(max_fragment_size); - const auto status = query.submit(); - ASSERTER(status == Query::Status::COMPLETE); + uint64_t cells_written = 0; + while (cells_written < a_write.size()) { + const uint64_t cells_this_write = std::min( + a_write.size() - cells_written, + write_unit_num_cells.value_or(a_write.size())); + query.set_data_buffer("a", &a_write[cells_written], cells_this_write); + + const auto status = query.submit(); + ASSERTER(status == Query::Status::COMPLETE); + + cells_written += cells_this_write; + } + query.finalize(); } @@ -716,15 +726,27 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const std::vector subarray = { Dom(base_d1 + 0, base_d1 + num_rows - 1), Dom(0, span_d2 - 1)}; - const auto actual = instance_dense_global_order( - ctx, 64 * 1024, dimensions, subarray); + const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); - std::vector> expect; - for (uint64_t r = 0; r < num_rows; r++) { - expect.push_back({Dom(base_d1 + r, base_d1 + r), Dom(0, span_d2 - 1)}); - } + DYNAMIC_SECTION( + "num_rows = " << num_rows + << ", write_unit_num_cells = " << write_unit_num_cells) { + const auto actual = instance_dense_global_order( + ctx, + 64 * 1024, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional{write_unit_num_cells}); + + std::vector> expect; + for (uint64_t r = 0; r < num_rows; r++) { + expect.push_back({Dom(base_d1 + r, base_d1 + r), Dom(0, span_d2 - 1)}); + } - CHECK(expect == actual); + CHECK(expect == actual); + } } // each tile is some rectangle of a 2D array @@ -749,34 +771,48 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const uint64_t max_fragment_size = 4 * 64 * 1024; - if (d1_extent == 8) { - const auto expect = Catch::Matchers::ContainsSubstring( - "Fragment size is too small to subdivide dense subarray into " - "multiple fragments"); - REQUIRE_THROWS(instance_dense_global_order( - ctx, max_fragment_size, dimensions, subarray)); - } else if (d1_start_offset + d1_end_offset > 0) { - // if this constraint is ever relaxed this test must be extended - // with new inputs which are offset within a tile - const auto expect = Catch::Matchers::ContainsSubstring( - "the subarray must coincide with the tile bounds"); - REQUIRE_THROWS(instance_dense_global_order( - ctx, max_fragment_size, dimensions, subarray)); - } else { - const std::vector> expect = { - {Dom(d1_start + 0 * d1_extent, d1_start + 1 * d1_extent - 1), - Dom(0, d2_span - 1)}, - {Dom(d1_start + 1 * d1_extent, d1_start + 2 * d1_extent - 1), - Dom(0, d2_span - 1)}, - {Dom(d1_start + 2 * d1_extent, d1_start + 3 * d1_extent - 1), - Dom(0, d2_span - 1)}, - {Dom(d1_start + 3 * d1_extent, d1_start + 4 * d1_extent - 1), - Dom(0, d2_span - 1)}}; - - const auto actual = instance_dense_global_order( - ctx, max_fragment_size, dimensions, subarray); - - CHECK(expect == actual); + const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); + + DYNAMIC_SECTION( + "start_offset = " << d1_start_offset << ", end_offset = " + << d1_end_offset << ", extent = " << d1_extent + << ", write_unit_num_cells = " + << write_unit_num_cells) { + if (d1_extent == 8) { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS(instance_dense_global_order( + ctx, max_fragment_size, dimensions, subarray)); + } else if (d1_start_offset + d1_end_offset > 0) { + // if this constraint is ever relaxed this test must be extended + // with new inputs which are offset within a tile + const auto expect = Catch::Matchers::ContainsSubstring( + "the subarray must coincide with the tile bounds"); + REQUIRE_THROWS(instance_dense_global_order( + ctx, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional(write_unit_num_cells))); + } else { + const std::vector> expect = { + {Dom(d1_start + 0 * d1_extent, d1_start + 1 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 1 * d1_extent, d1_start + 2 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 2 * d1_extent, d1_start + 3 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 3 * d1_extent, d1_start + 4 * d1_extent - 1), + Dom(0, d2_span - 1)}}; + + const auto actual = instance_dense_global_order( + ctx, max_fragment_size, dimensions, subarray); + + CHECK(expect == actual); + } } } diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index a8adf4c24fa..61fe13c46fd 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -124,6 +124,7 @@ GlobalOrderWriter::GlobalWriteState::GlobalWriteState( : last_tiles_(memory_tracker->get_resource(MemoryType::WRITER_TILE_DATA)) , last_var_offsets_(memory_tracker->get_resource(MemoryType::WRITER_DATA)) , cells_written_(memory_tracker->get_resource(MemoryType::WRITER_DATA)) { + dense_.domain_tile_offset_ = 0; } /* ****************************** */ @@ -650,6 +651,18 @@ Status GlobalOrderWriter::finalize_global_write_state() { // Close all files RETURN_NOT_OK(close_files(meta)); + // Update dense fragment domain + if (dense()) { + const uint64_t num_tiles_in_fragment = + meta->loaded_metadata()->tile_offsets()[0].size(); + NDRange fragment_domain = domain_tile_offset( + array_schema_.domain(), + subarray_.ndrange(0), + global_write_state_->dense_.domain_tile_offset_, + num_tiles_in_fragment); + meta->set_domain(fragment_domain); + } + // Check that the same number of cells was written across attributes // and dimensions auto cell_num = global_write_state_->cells_written_[buffers_.begin()->first]; @@ -775,69 +788,46 @@ Status GlobalOrderWriter::global_write() { const auto fragments = identify_fragment_tile_boundaries(tiles); for (uint64_t f = 0; f < fragments.size(); f++) { - const uint64_t start_tile = fragments[f].second; - const uint64_t num_tiles = + const uint64_t input_start_tile = fragments[f].second; + const uint64_t input_num_tiles = (f + 1 < fragments.size() ? fragments[f + 1].second : tile_num) - - start_tile; + input_start_tile; - if (num_tiles == 0) { + if (input_num_tiles == 0) { // this should only happen if there is only one tile of input and we have // to wait for finalize, or if continuing a fragment from a previous write // and there is no more room iassert(f == 0); - if (current_fragment_size_ > 0) { - RETURN_CANCEL_OR_ERROR(start_new_fragment(start_tile, num_tiles)); - } else { + if (current_fragment_size_ == 0) { iassert(fragments.size() == 1); } } else { if (f > 0 || !global_write_state_->frag_meta_) { - RETURN_CANCEL_OR_ERROR(start_new_fragment(start_tile, num_tiles)); - } else { - // this means a resumed write of a previously started fragment - if (dense()) { - // in which case it is necessary to extend the domain - // (assumption which is true as of this writing: all dimensions of a - // dense domain have the same data type) - auto updated_domain = [&](T) -> NDRange { - using A = NDRangeTypedAccess; - const NDRange& written_domain = - global_write_state_->frag_meta_->domain(); - NDRange pending_range = subarray_.ndrange(0); - A::lower_bound(pending_range, 0) = - A::upper_bound(written_domain, 0) + 1; - - auto extended = domain_tile_offset( - array_schema_.domain(), - pending_range, - 0, - global_write_state_->frag_meta_->tile_index_base() + num_tiles); - A::lower_bound(extended, 0) = A::lower_bound(written_domain, 0); - return extended; - }; - - NDRange extended = apply_with_type( - updated_domain, array_schema_.domain().dimension_ptr(0)->type()); - - global_write_state_->frag_meta_->set_domain(extended); - } + RETURN_CANCEL_OR_ERROR(start_new_fragment()); } auto frag_meta = global_write_state_->frag_meta_; // update metadata of current fragment - frag_meta->set_num_tiles(frag_meta->tile_index_base() + num_tiles); + frag_meta->set_num_tiles(frag_meta->tile_index_base() + input_num_tiles); set_coords_metadata( - start_tile, start_tile + num_tiles, tiles, mbrs, frag_meta); + input_start_tile, + input_start_tile + input_num_tiles, + tiles, + mbrs, + frag_meta); // write tiles for all attributes - RETURN_CANCEL_OR_ERROR( - write_tiles(start_tile, start_tile + num_tiles, frag_meta, &tiles)); + RETURN_CANCEL_OR_ERROR(write_tiles( + input_start_tile, + input_start_tile + input_num_tiles, + frag_meta, + &tiles)); } global_write_state_->frag_meta_->set_tile_index_base( - global_write_state_->frag_meta_->tile_index_base() + num_tiles); + global_write_state_->frag_meta_->tile_index_base() + input_num_tiles); } if (!fragments.empty()) { @@ -860,7 +850,7 @@ Status GlobalOrderWriter::global_write_handle_last_tile() { // if we haven't started a fragment yet, now is the time if (!global_write_state_->frag_meta_) { iassert(!dense()); // FIXME: probably not true - RETURN_CANCEL_OR_ERROR(start_new_fragment(0, 0)); + RETURN_CANCEL_OR_ERROR(start_new_fragment()); } // Reserve space for the last tile in the fragment metadata @@ -1473,13 +1463,20 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( uint64_t running_tiles_size = current_fragment_size_; uint64_t fragment_size = current_fragment_size_; - uint64_t fragment_start = 0, fragment_end = 0; + uint64_t fragment_start = 0; + std::optional fragment_end; std::vector> fragments; + uint64_t hyperrow_offset = 0; std::optional hyperrow_num_tiles; if (dense()) { hyperrow_num_tiles = compute_hyperrow_num_tiles( array_schema_.domain(), subarray_.ndrange(0)); + + if (global_write_state_->frag_meta_) { + hyperrow_offset = global_write_state_->dense_.domain_tile_offset_ + + global_write_state_->frag_meta_->tile_index_base(); + } } // Make sure we don't write more than the desired fragment size. @@ -1514,7 +1511,7 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( if (running_tiles_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to write a single tile"); - } else if (fragment_end == 0) { + } else if (!fragment_end.has_value() && fragment_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to subdivide dense subarray into " "multiple fragments"); @@ -1525,12 +1522,14 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( iassert(running_tiles_size >= fragment_size); running_tiles_size -= fragment_size; - fragment_start = fragment_end; - fragment_end = 0; + fragment_start = fragment_end.value_or(0); + fragment_end = std::nullopt; } if (!hyperrow_num_tiles.has_value() || - ((t + 1) - fragment_start) % hyperrow_num_tiles.value() == 0) { + ((hyperrow_offset + t + 1) - fragment_start) % + hyperrow_num_tiles.value() == + 0) { fragment_size = running_tiles_size + tile_size; fragment_end = t + 1; } @@ -1593,8 +1592,7 @@ static NDRange domain_tile_offset( return adjusted; } -Status GlobalOrderWriter::start_new_fragment( - uint64_t tile_start, uint64_t num_tiles) { +Status GlobalOrderWriter::start_new_fragment() { // finish off current fragment if there is one if (global_write_state_->frag_meta_) { auto frag_meta = global_write_state_->frag_meta_; @@ -1603,6 +1601,20 @@ Status GlobalOrderWriter::start_new_fragment( // Close all files RETURN_NOT_OK(close_files(frag_meta)); + // Update dense fragment domain + if (dense()) { + const uint64_t num_tiles_in_fragment = + frag_meta->loaded_metadata()->tile_offsets()[0].size(); + NDRange fragment_domain = domain_tile_offset( + array_schema_.domain(), + subarray_.ndrange(0), + global_write_state_->dense_.domain_tile_offset_, + num_tiles_in_fragment); + frag_meta->set_domain(fragment_domain); + + global_write_state_->dense_.domain_tile_offset_ += num_tiles_in_fragment; + } + // Set the processed conditions frag_meta->set_processed_conditions(processed_conditions_); @@ -1625,22 +1637,12 @@ Status GlobalOrderWriter::start_new_fragment( fragment_uri_ = frag_dir_uri.join_path(new_fragment_str); } - // Set domain of new fragment if needed - std::optional new_fragment_domain; - if (dense()) { - new_fragment_domain = domain_tile_offset( - array_schema_.domain(), subarray_.ndrange(0), tile_start, num_tiles); - } - // Create a new fragment. current_fragment_size_ = 0; - RETURN_NOT_OK(create_fragment( - dense(), - global_write_state_->frag_meta_, - new_fragment_domain.has_value() ? &new_fragment_domain.value() : - nullptr)); + RETURN_NOT_OK(create_fragment(dense(), global_write_state_->frag_meta_)); return Status::Ok(); } } // namespace tiledb::sm + diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index 65cf6304574..9ab8de991f2 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -108,6 +108,18 @@ class GlobalOrderWriter : public WriterBase { */ std::unordered_map multipart_upload_state_; + + /** + * State for writing dense fragments. + */ + struct DenseWriteState { + /** + * Tile offset in the subarray domain which the current fragment began + * writing to. + */ + uint64_t domain_tile_offset_; + }; + DenseWriteState dense_; }; /* ********************************* */ @@ -390,13 +402,8 @@ class GlobalOrderWriter : public WriterBase { * Close the current fragment and start a new one. The closed fragment will * be added to `frag_uris_to_commit_` so that all fragments in progress can * be written at once. - * - * @param tile_start the tile offset into the subarray domain where the - * fragment starts (dense only) - * @param num_tiles the number of tiles which will be written to the new - * fragment (dense only) */ - Status start_new_fragment(uint64_t tile_start, uint64_t num_tiles); + Status start_new_fragment(); /** * @return true if this write is to a dense fragment From ffecac345e60a26facd91c8564cf771d5b1a6333 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 13 Oct 2025 13:31:35 -0400 Subject: [PATCH 031/109] Revert "Add doc for Attribute::set_fill_value overload" This reverts commit 1c7bbd2b2a6fc946bd9bacd46feaf01fca1a06a7. --- tiledb/sm/cpp_api/attribute.h | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/tiledb/sm/cpp_api/attribute.h b/tiledb/sm/cpp_api/attribute.h index c6a65b75c13..03ca2d696ef 100644 --- a/tiledb/sm/cpp_api/attribute.h +++ b/tiledb/sm/cpp_api/attribute.h @@ -275,34 +275,6 @@ class Attribute { return *this; } - /** - * Sets the default fill value for the input attribute. This value will - * be used for the input attribute whenever querying (1) an empty cell in - * a dense array, or (2) a non-empty cell (in either dense or sparse array) - * when values on the input attribute are missing (e.g., if the user writes - * a subset of the attributes in a write operation). - * - * Applicable to fixed-sized attributes only. - * - * **Example:** - * - * @code{.c} - * tiledb::Context ctx; - * - * // Fixed-sized attribute - * auto a1 = tiledb::Attribute::create(ctx, "a1"); - * a1.set_fill_value(0); - * @endcode - * - * @param value The fill value to set. - * - * @note A call to `cell_val_num` sets the fill value - * of the attribute to its default. Therefore, make sure you invoke - * `set_fill_value` after deciding on the number - * of values this attribute will hold in each cell. - * - * @note The input `size` should be equal to the cell size. - */ template Attribute& set_fill_value(T value) { return set_fill_value(static_cast(&value), sizeof(T)); From be2fefb994e12d9704149e2ac75bc4044a22d8b4 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 13 Oct 2025 13:52:19 -0400 Subject: [PATCH 032/109] make format --- tiledb/sm/query/writers/global_order_writer.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 61fe13c46fd..a33fe543a87 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1645,4 +1645,3 @@ Status GlobalOrderWriter::start_new_fragment() { } } // namespace tiledb::sm - From 04851b7a3429663aa4547e7b26a3d40fc1bdefcd Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 13 Oct 2025 14:44:42 -0400 Subject: [PATCH 033/109] Remove incorrect assert --- tiledb/sm/query/writers/global_order_writer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index a33fe543a87..0df4c91d2ec 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -848,8 +848,8 @@ Status GlobalOrderWriter::global_write_handle_last_tile() { return Status::Ok(); // if we haven't started a fragment yet, now is the time + // (this can happen if the writes do not fill a full tile) if (!global_write_state_->frag_meta_) { - iassert(!dense()); // FIXME: probably not true RETURN_CANCEL_OR_ERROR(start_new_fragment()); } From 33ac8162b77e753b6a93ed6713776fc03a500642 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 15 Oct 2025 12:51:47 -0400 Subject: [PATCH 034/109] Tile::filtered_buffer() const --- tiledb/sm/tile/tile.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tiledb/sm/tile/tile.h b/tiledb/sm/tile/tile.h index 3d730f86a20..d82382d9da2 100644 --- a/tiledb/sm/tile/tile.h +++ b/tiledb/sm/tile/tile.h @@ -458,6 +458,13 @@ class WriterTile : public TileBase { return filtered_buffer_; } + /** + * Returns the buffer that contains the filtered, on-disk format. + */ + inline const FilteredBuffer& filtered_buffer() const { + return filtered_buffer_; + } + /** * Write method used for var data. Resizes the internal buffer if needed. * From 60e887b9d5ac52b3a3a7769c7e87f121b2e88171 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Thu, 16 Oct 2025 15:05:10 -0400 Subject: [PATCH 035/109] Add start/end tile idx args to WriterBase methods --- tiledb/sm/query/writers/unordered_writer.cc | 2 +- tiledb/sm/query/writers/writer_base.cc | 46 +++++++++++-------- tiledb/sm/query/writers/writer_base.h | 49 +++++++++++++++++++-- 3 files changed, 74 insertions(+), 23 deletions(-) diff --git a/tiledb/sm/query/writers/unordered_writer.cc b/tiledb/sm/query/writers/unordered_writer.cc index ed568d2fa87..4b8ea2d6b45 100644 --- a/tiledb/sm/query/writers/unordered_writer.cc +++ b/tiledb/sm/query/writers/unordered_writer.cc @@ -699,7 +699,7 @@ Status UnorderedWriter::unordered_write() { } // Compute tile metadata. - RETURN_CANCEL_OR_ERROR(compute_tiles_metadata(tile_num, tiles)); + RETURN_CANCEL_OR_ERROR(compute_tiles_metadata(tiles)); // Filter all tiles RETURN_CANCEL_OR_ERROR(filter_tiles(&tiles)); diff --git a/tiledb/sm/query/writers/writer_base.cc b/tiledb/sm/query/writers/writer_base.cc index 5b500c18c7b..e0fcd5683c0 100644 --- a/tiledb/sm/query/writers/writer_base.cc +++ b/tiledb/sm/query/writers/writer_base.cc @@ -614,6 +614,8 @@ Status WriterBase::close_files(shared_ptr meta) const { } std::vector WriterBase::compute_mbrs( + uint64_t start_tile_idx, + uint64_t end_tile_idx, const tdb::pmr::unordered_map& tiles) const { auto timer_se = stats_->start_timer("compute_coord_meta"); @@ -628,16 +630,13 @@ std::vector WriterBase::compute_mbrs( return std::vector(); } - // Compute number of tiles. Assumes all attributes and - // and dimensions have the same number of tiles - auto tile_num = tiles.begin()->second.size(); auto dim_num = array_schema_.dim_num(); // Compute MBRs - std::vector mbrs(tile_num); - auto status = - parallel_for(&resources_.compute_tp(), 0, tile_num, [&](uint64_t i) { - mbrs[i].resize(dim_num); + std::vector mbrs(end_tile_idx - start_tile_idx); + auto status = parallel_for( + &resources_.compute_tp(), start_tile_idx, end_tile_idx, [&](uint64_t i) { + mbrs[i - start_tile_idx].resize(dim_num); std::vector data(dim_num); for (unsigned d = 0; d < dim_num; ++d) { auto dim{array_schema_.dimension_ptr(d)}; @@ -689,12 +688,13 @@ void WriterBase::set_coords_metadata( } Status WriterBase::compute_tiles_metadata( - uint64_t tile_num, + uint64_t start_tile_idx, + uint64_t end_tile_idx, tdb::pmr::unordered_map& tiles) const { auto* compute_tp = &resources_.compute_tp(); // Parallelize over attributes? - if (tiles.size() > tile_num) { + if (tiles.size() > (end_tile_idx - start_tile_idx)) { auto st = parallel_for(compute_tp, 0, tiles.size(), [&](uint64_t i) { auto tiles_it = tiles.begin(); std::advance(tiles_it, i); @@ -724,14 +724,15 @@ Status WriterBase::compute_tiles_metadata( const auto var_size = array_schema_.var_size(attr); const auto cell_size = array_schema_.cell_size(attr); const auto cell_val_num = array_schema_.cell_val_num(attr); - auto st = parallel_for(compute_tp, 0, tile_num, [&](uint64_t t) { - TileMetadataGenerator md_generator( - type, is_dim, var_size, cell_size, cell_val_num); - md_generator.process_full_tile(attr_tiles[t]); - md_generator.set_tile_metadata(attr_tiles[t]); + auto st = parallel_for( + compute_tp, start_tile_idx, end_tile_idx, [&](uint64_t t) { + TileMetadataGenerator md_generator( + type, is_dim, var_size, cell_size, cell_val_num); + md_generator.process_full_tile(attr_tiles[t]); + md_generator.set_tile_metadata(attr_tiles[t]); - return Status::Ok(); - }); + return Status::Ok(); + }); RETURN_NOT_OK(st); } } @@ -794,13 +795,16 @@ Status WriterBase::create_fragment( } Status WriterBase::filter_tiles( + uint64_t start_tile_idx, + uint64_t end_tile_idx, tdb::pmr::unordered_map* tiles) { auto timer_se = stats_->start_timer("filter_tiles"); auto status = parallel_for(&resources_.compute_tp(), 0, tiles->size(), [&](uint64_t i) { auto tiles_it = tiles->begin(); std::advance(tiles_it, i); - throw_if_not_ok(filter_tiles(tiles_it->first, &tiles_it->second)); + throw_if_not_ok(filter_tiles( + start_tile_idx, end_tile_idx, tiles_it->first, &tiles_it->second)); throw_if_cancelled(); return Status::Ok(); }); @@ -810,7 +814,10 @@ Status WriterBase::filter_tiles( } Status WriterBase::filter_tiles( - const std::string& name, WriterTileTupleVector* tiles) { + uint64_t start_tile_idx, + uint64_t end_tile_idx, + const std::string& name, + WriterTileTupleVector* tiles) { const bool var_size = array_schema_.var_size(name); const bool nullable = array_schema_.is_nullable(name); @@ -820,7 +827,8 @@ Status WriterBase::filter_tiles( // Process all tiles, minus offsets, they get processed separately. std::vector> args; args.reserve(tile_num * (1 + nullable)); - for (auto& tile : *tiles) { + for (uint64_t t = start_tile_idx; t < end_tile_idx; t++) { + auto& tile = (*tiles)[t]; if (var_size) { args.emplace_back(&tile.var_tile(), &tile.offset_tile(), false, false); } else { diff --git a/tiledb/sm/query/writers/writer_base.h b/tiledb/sm/query/writers/writer_base.h index b16022a3ce0..b7304903002 100644 --- a/tiledb/sm/query/writers/writer_base.h +++ b/tiledb/sm/query/writers/writer_base.h @@ -242,14 +242,27 @@ class WriterBase : public StrategyBase, public IQueryStrategy { /** * Computes the MBRs. * + * @param start_tile_idx The index of the first tile to compute MBR for + * @param end_tile_idx The index of the last tile to compute MBR for * @param tiles The tiles to calculate the MBRs from. It is a map of vectors, * one vector of tiles per dimension/coordinates. * @return MBRs. */ std::vector compute_mbrs( + uint64_t start_tile_idx, + uint64_t end_tile_idx, const tdb::pmr::unordered_map& tiles) const; + /** + * Computes the MBRs for all of the requested tiles. See above. + */ + std::vector compute_mbrs( + const tdb::pmr::unordered_map& tiles) + const { + return compute_mbrs(0, tiles.begin()->second.size(), tiles); + } + /** * Set the coordinates metadata (e.g., MBRs). * @@ -270,15 +283,26 @@ class WriterBase : public StrategyBase, public IQueryStrategy { /** * Computes the tiles metadata (min/max/sum/null count). * - * @param tile_num The number of tiles. + * @param start_tile_idx The index of the first tile to compute metadata for + * @param end_tile_idx The index of the last tile to compute metadata for * @param tiles The tiles to calculate the tile metadata from. It is * a map of vectors, one vector of tiles per dimension. * @return Status */ Status compute_tiles_metadata( - uint64_t tile_num, + uint64_t start_tile_idx, + uint64_t end_tile_idx, tdb::pmr::unordered_map& tiles) const; + /** + * Computes the tiles metadata for each tile in the provided list. See above. + */ + Status compute_tiles_metadata( + tdb::pmr::unordered_map& tiles) + const { + return compute_tiles_metadata(0, tiles.begin()->second.size(), tiles); + } + /** * Returns the i-th coordinates in the coordinate buffers in string * format. @@ -306,19 +330,38 @@ class WriterBase : public StrategyBase, public IQueryStrategy { * Runs the input coordinate and attribute tiles through their * filter pipelines. The tile buffers are modified to contain the output * of the pipeline. + * + * @param start_tile_idx The index of the first tile to filter + * @param end_tile_idx The index of the last tile to filter */ Status filter_tiles( + uint64_t start_tile_idx, + uint64_t end_tile_idx, tdb::pmr::unordered_map* tiles); + /** + * See above, filtering all of the provided tiles. + */ + Status filter_tiles( + tdb::pmr::unordered_map* tiles) { + return filter_tiles(0, tiles->begin()->second.size(), tiles); + } + /** * Runs the input tiles for the input attribute through the filter pipeline. * The tile buffers are modified to contain the output of the pipeline. * + * @param start_tile_idx The index of the first tile to filter + * @param end_tile_idx The index of the last tile to filter * @param name The attribute/dimension the tiles belong to. * @param tile The tiles to be filtered. * @return Status */ - Status filter_tiles(const std::string& name, WriterTileTupleVector* tiles); + Status filter_tiles( + uint64_t start_tile_idx, + uint64_t end_tile_idx, + const std::string& name, + WriterTileTupleVector* tiles); /** * Runs the input tile for the input attribute/dimension through the filter From 8e8d6171ab75e7303391a77c542b64672bfaf721 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 17 Oct 2025 08:00:09 -0400 Subject: [PATCH 036/109] Col tile order, and also keeping a tail of tiles in memory --- test/src/unit-cppapi-max-fragment-size.cc | 314 ++++++++++++------ .../sm/query/writers/global_order_writer.cc | 193 ++++++++--- tiledb/sm/query/writers/global_order_writer.h | 25 +- 3 files changed, 369 insertions(+), 163 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index de09b9e101a..640ba2a2623 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -544,6 +544,8 @@ template std::vector>> instance_dense_global_order( const Context& ctx, + tiledb_layout_t tile_order, + tiledb_layout_t cell_order, uint64_t max_fragment_size, const std::vector>& dimensions, const std::vector>& subarray, @@ -567,6 +569,8 @@ instance_dense_global_order( auto a = Attribute::create(ctx, "a"); ArraySchema schema(ctx, TILEDB_DENSE); schema.set_domain(domain); + schema.set_tile_order(tile_order); + schema.set_cell_order(cell_order); schema.add_attributes(a); Array::create(array_name, schema); @@ -684,14 +688,29 @@ instance_dense_global_order( ASSERTER(fragment_domains[0][0].lower_bound == subarray[0].lower_bound); ASSERTER(fragment_domains.back()[0].upper_bound == subarray[0].upper_bound); for (uint32_t f = 0; f < fragment_domains.size(); f++) { - if (f > 0) { - ASSERTER( - fragment_domains[f - 1][0].upper_bound + 1 == - fragment_domains[f][0].lower_bound); - } - // non-first dimensions should match - for (uint64_t d = 1; d < dimensions.size(); d++) { - ASSERTER(fragment_domains[f][d] == subarray[d]); + if (tile_order == TILEDB_ROW_MAJOR) { + // first dimension is ranging and contiguous + if (f > 0) { + ASSERTER( + fragment_domains[f - 1][0].upper_bound + 1 == + fragment_domains[f][0].lower_bound); + } + // non-first dimensions should match + for (uint64_t d = 1; d < dimensions.size(); d++) { + ASSERTER(fragment_domains[f][d] == subarray[d]); + } + } else { + // last dimension is ranging and contiguous + const uint64_t num_dims = dimensions.size(); + if (f > 0) { + ASSERTER( + fragment_domains[f - 1][num_dims - 1].upper_bound + 1 == + fragment_domains[f][num_dims - 1].lower_bound); + } + // non-last dimensions should match + for (uint64_t d = 0; d < num_dims - 1; d++) { + ASSERTER(fragment_domains[f][d] == subarray[d]); + } } } @@ -711,107 +730,183 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { Context ctx; - // each tile is a full row of a 2D array - SECTION("Row tiles") { - using Dim = templates::Dimension; - using Dom = templates::Domain; + const tiledb_layout_t tile_order = + GENERATE(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + const tiledb_layout_t cell_order = + GENERATE(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + + DYNAMIC_SECTION( + "tile_order = " << sm::layout_str(static_cast(tile_order)) + << ", cell_order = " + << sm::layout_str(static_cast(cell_order))) { + // each tile is a full row of a 2D array + SECTION("Row tiles") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + constexpr uint64_t max_fragment_size = 64 * 1024; + + constexpr size_t span_d2 = 10000; + const std::vector dimensions = { + Dim(0, std::numeric_limits::max() - 1, 1), + Dim(0, span_d2 - 1, span_d2)}; + + const uint64_t base_d1 = 12345; + const uint64_t num_rows = GENERATE(1, 2, 4, 8); + const std::vector subarray = { + Dom(base_d1 + 0, base_d1 + num_rows - 1), Dom(0, span_d2 - 1)}; + + const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); + + DYNAMIC_SECTION( + "num_rows = " << num_rows << ", write_unit_num_cells = " + << write_unit_num_cells) { + if (tile_order == TILEDB_COL_MAJOR && num_rows > 1) { + // Consider the following example: + // + // [ 1 1 1 1 2 2 2 2 ] + // [ 3 3 3 3 4 4 4 4 ] + // [ 5 5 5 5 6 6 6 6 ] + // [ 7 7 7 7 8 8 8 8 ] + // + // In row major order we can see that there are two tiles per + // "hyper-row". In column major order instead the tiles are placed [1 + // 3 5 7 2 4 6 8]. A "hyperrow" is not formed until tile 7 is + // written... i.e the number of rows. + // + // But wait, this example only has one tile per row! And indeed this + // does mean that each tile can be its own hyper-row again. For + // simplicity we elect not to implement that special case. + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray), + expect); + } else { + const auto actual = instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional{write_unit_num_cells}); + + std::vector> expect; + for (uint64_t r = 0; r < num_rows; r++) { + expect.push_back( + {Dom(base_d1 + r, base_d1 + r), Dom(0, span_d2 - 1)}); + } - constexpr size_t span_d2 = 10000; - const std::vector dimensions = { - Dim(0, std::numeric_limits::max() - 1, 1), - Dim(0, span_d2 - 1, span_d2)}; - - const uint64_t base_d1 = 12345; - const uint64_t num_rows = GENERATE(1, 2, 4, 8); - const std::vector subarray = { - Dom(base_d1 + 0, base_d1 + num_rows - 1), Dom(0, span_d2 - 1)}; - - const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); - - DYNAMIC_SECTION( - "num_rows = " << num_rows - << ", write_unit_num_cells = " << write_unit_num_cells) { - const auto actual = instance_dense_global_order( - ctx, - 64 * 1024, - dimensions, - subarray, - write_unit_num_cells == 0 ? - std::nullopt : - std::optional{write_unit_num_cells}); - - std::vector> expect; - for (uint64_t r = 0; r < num_rows; r++) { - expect.push_back({Dom(base_d1 + r, base_d1 + r), Dom(0, span_d2 - 1)}); + CHECK(expect == actual); + } } - - CHECK(expect == actual); } - } - // each tile is some rectangle of a 2D array - SECTION("Rectangle tiles") { - using Dim = templates::Dimension; - using Dom = templates::Domain; + // each tile is some rectangle of a 2D array + SECTION("Rectangle tiles") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + const uint64_t d1_extent = GENERATE(8, 4); + constexpr size_t d2_span = 10000; + REQUIRE(d2_span % d1_extent == 0); // for row major + + const uint64_t d1_subarray = 16; + REQUIRE(d2_span % d1_subarray == 0); // for column major + + const std::vector dimensions = { + Dim(0, std::numeric_limits::max() - 1, d1_extent), + Dim(0, d2_span - 1, d2_span / d1_extent)}; + + const uint64_t d1_start_offset = GENERATE(0, 1); + const uint64_t d1_end_offset = GENERATE(0, 1); + const uint64_t d1_start = 100 + d1_start_offset; + const uint64_t d1_end = d1_start + d1_subarray - 1 - d1_end_offset; + const std::vector subarray = { + Dom(d1_start, d1_end), Dom(0, d2_span - 1)}; + + const uint64_t max_fragment_size = 4 * 64 * 1024; + + const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); + + DYNAMIC_SECTION( + "start_offset = " + << d1_start_offset << ", end_offset = " << d1_end_offset + << ", extent = " << d1_extent + << ", write_unit_num_cells = " << write_unit_num_cells) { + if (d1_extent == 8) { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray), + expect); + } else if (d1_start_offset + d1_end_offset > 0) { + // if this constraint is ever relaxed this test must be extended + // with new inputs which are offset within a tile + const auto expect = Catch::Matchers::ContainsSubstring( + "the subarray must coincide with the tile bounds"); + REQUIRE_THROWS(instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional(write_unit_num_cells))); + } else { + std::vector> expect; + if (tile_order == TILEDB_ROW_MAJOR) { + expect = { + {Dom(d1_start + 0 * d1_extent, d1_start + 1 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 1 * d1_extent, d1_start + 2 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 2 * d1_extent, d1_start + 3 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 3 * d1_extent, d1_start + 4 * d1_extent - 1), + Dom(0, d2_span - 1)}}; + } else { + expect = { + {Dom(d1_start, d1_start + d1_subarray - 1), + Dom(0 * (d2_span / 4), 1 * (d2_span / 4) - 1)}, + {Dom(d1_start, d1_start + d1_subarray - 1), + Dom(1 * (d2_span / 4), 2 * (d2_span / 4) - 1)}, + {Dom(d1_start, d1_start + d1_subarray - 1), + Dom(2 * (d2_span / 4), 3 * (d2_span / 4) - 1)}, + {Dom(d1_start, d1_start + d1_subarray - 1), + Dom(3 * (d2_span / 4), 4 * (d2_span / 4) - 1)}, + }; + } + + const auto actual = instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray); - const uint64_t d1_extent = GENERATE(8, 4); - constexpr size_t d2_span = 10000; - REQUIRE(d2_span % d1_extent == 0); // for test setup - - const std::vector dimensions = { - Dim(0, std::numeric_limits::max() - 1, d1_extent), - Dim(0, d2_span - 1, d2_span / d1_extent)}; - - const uint64_t d1_start_offset = GENERATE(0, 1); - const uint64_t d1_end_offset = GENERATE(0, 1); - const uint64_t d1_start = 100 + d1_start_offset; - const uint64_t d1_end = d1_start + 15 - d1_end_offset; - const std::vector subarray = { - Dom(d1_start, d1_end), Dom(0, d2_span - 1)}; - - const uint64_t max_fragment_size = 4 * 64 * 1024; - - const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); - - DYNAMIC_SECTION( - "start_offset = " << d1_start_offset << ", end_offset = " - << d1_end_offset << ", extent = " << d1_extent - << ", write_unit_num_cells = " - << write_unit_num_cells) { - if (d1_extent == 8) { - const auto expect = Catch::Matchers::ContainsSubstring( - "Fragment size is too small to subdivide dense subarray into " - "multiple fragments"); - REQUIRE_THROWS(instance_dense_global_order( - ctx, max_fragment_size, dimensions, subarray)); - } else if (d1_start_offset + d1_end_offset > 0) { - // if this constraint is ever relaxed this test must be extended - // with new inputs which are offset within a tile - const auto expect = Catch::Matchers::ContainsSubstring( - "the subarray must coincide with the tile bounds"); - REQUIRE_THROWS(instance_dense_global_order( - ctx, - max_fragment_size, - dimensions, - subarray, - write_unit_num_cells == 0 ? - std::nullopt : - std::optional(write_unit_num_cells))); - } else { - const std::vector> expect = { - {Dom(d1_start + 0 * d1_extent, d1_start + 1 * d1_extent - 1), - Dom(0, d2_span - 1)}, - {Dom(d1_start + 1 * d1_extent, d1_start + 2 * d1_extent - 1), - Dom(0, d2_span - 1)}, - {Dom(d1_start + 2 * d1_extent, d1_start + 3 * d1_extent - 1), - Dom(0, d2_span - 1)}, - {Dom(d1_start + 3 * d1_extent, d1_start + 4 * d1_extent - 1), - Dom(0, d2_span - 1)}}; - - const auto actual = instance_dense_global_order( - ctx, max_fragment_size, dimensions, subarray); - - CHECK(expect == actual); + CHECK(expect == actual); + } } } } @@ -829,7 +924,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const uint64_t max_fragment_size = 24; instance_dense_global_order( - ctx, max_fragment_size, {d1, d2}, {s1, s2}); + ctx, tile_order, cell_order, max_fragment_size, {d1, d2}, {s1, s2}); } SECTION("Example 2") { @@ -840,7 +935,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const uint64_t max_fragment_size = 28; instance_dense_global_order( - ctx, max_fragment_size, {d1, d2}, {s1, s2}); + ctx, tile_order, cell_order, max_fragment_size, {d1, d2}, {s1, s2}); } } } @@ -926,6 +1021,11 @@ TEST_CASE( RC_PRE(num_cells_per_tile.has_value()); RC_PRE(num_cells_per_tile.value() <= 1024 * 128); + const tiledb_layout_t tile_order = + *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + const tiledb_layout_t cell_order = + *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + const uint64_t tile_size = num_cells_per_tile.value() * sizeof(int); const uint64_t filter_chunk_size = sm::WriterTile::compute_chunk_size(tile_size, sizeof(int)); @@ -961,6 +1061,6 @@ TEST_CASE( << "max_fragment_size: " << max_fragment_size << std::endl; instance_dense_global_order( - ctx, max_fragment_size, {d1, d2}, subarray); + ctx, tile_order, cell_order, max_fragment_size, {d1, d2}, subarray); }); } diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 0df4c91d2ec..683fb33759e 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -62,7 +62,7 @@ using namespace tiledb::sm::stats; namespace tiledb::sm { static NDRange domain_tile_offset( - const Domain& arraydomain, + const ArraySchema& arrayschema, const NDRange& domain, uint64_t start_tile, uint64_t num_tiles); @@ -522,27 +522,37 @@ void GlobalOrderWriter::clean_up() { } Status GlobalOrderWriter::filter_last_tiles(uint64_t cell_num) { + const uint64_t last_tile_offset = + global_write_state_->last_tiles_.begin()->second.size() - 1; + // Adjust cell num for (auto& last_tiles : global_write_state_->last_tiles_) { - last_tiles.second[0].set_final_size(cell_num); + last_tiles.second.back()->set_final_size(cell_num); } // Compute coordinates metadata auto meta = global_write_state_->frag_meta_; - auto mbrs = compute_mbrs(global_write_state_->last_tiles_); + auto mbrs = compute_mbrs( + last_tile_offset, last_tile_offset + 1, global_write_state_->last_tiles_); set_coords_metadata(0, 1, global_write_state_->last_tiles_, mbrs, meta); // Compute tile metadata. - RETURN_NOT_OK(compute_tiles_metadata(1, global_write_state_->last_tiles_)); + RETURN_NOT_OK(compute_tiles_metadata( + last_tile_offset, + last_tile_offset + 1, + global_write_state_->last_tiles_)); // Gather stats stats_->add_counter( "cell_num", - global_write_state_->last_tiles_.begin()->second[0].cell_num()); + global_write_state_->last_tiles_.begin()->second.back()->cell_num()); stats_->add_counter("tile_num", 1); // Filter tiles - RETURN_NOT_OK(filter_tiles(&global_write_state_->last_tiles_)); + RETURN_NOT_OK(filter_tiles( + last_tile_offset, + last_tile_offset + 1, + &global_write_state_->last_tiles_)); return Status::Ok(); } @@ -632,6 +642,22 @@ Status GlobalOrderWriter::compute_coord_dups( Status GlobalOrderWriter::finalize_global_write_state() { iassert(layout_ == Layout::GLOBAL_ORDER, "layout = {}", layout_str(layout_)); + // For dense, there may be prepared tiles which have not been flushed yet + if (dense()) { + const uint64_t num_remaining = + global_write_state_->last_tiles_.begin()->second.size() - 1; + if (num_remaining > 0) { + iassert(global_write_state_->frag_meta_); + throw_if_not_ok(populate_fragment( + global_write_state_->last_tiles_, 0, num_remaining)); + // NB: there is a possibility here that we write a tile bigger than the + // max fragment size if these remaining tiles fill it up and then the last + // tile runs over... we can live with that right? + } + } else { + iassert(global_write_state_->last_tiles_.begin()->second.size() <= 1); + } + // Handle last tile Status st = global_write_handle_last_tile(); if (!st.ok()) { @@ -656,7 +682,7 @@ Status GlobalOrderWriter::finalize_global_write_state() { const uint64_t num_tiles_in_fragment = meta->loaded_metadata()->tile_offsets()[0].size(); NDRange fragment_domain = domain_tile_offset( - array_schema_.domain(), + array_schema_, subarray_.ndrange(0), global_write_state_->dense_.domain_tile_offset_, num_tiles_in_fragment); @@ -745,6 +771,24 @@ Status GlobalOrderWriter::finalize_global_write_state() { return st; } +Status GlobalOrderWriter::populate_fragment( + tdb::pmr::unordered_map& tiles, + uint64_t tile_offset, + uint64_t num_tiles) { + auto frag_meta = global_write_state_->frag_meta_; + + // update metadata of current fragment + frag_meta->set_num_tiles(frag_meta->tile_index_base() + num_tiles); + + // write tiles for all attributes + RETURN_CANCEL_OR_ERROR( + write_tiles(tile_offset, tile_offset + num_tiles, frag_meta, &tiles)); + + frag_meta->set_tile_index_base(frag_meta->tile_index_base() + num_tiles); + + return Status::Ok(); +} + Status GlobalOrderWriter::global_write() { // Applicable only to global write on dense/sparse arrays iassert(layout_ == Layout::GLOBAL_ORDER, "layout = {}", layout_str(layout_)); @@ -771,23 +815,39 @@ Status GlobalOrderWriter::global_write() { query_memory_tracker_->get_resource(MemoryType::WRITER_TILE_DATA)); RETURN_CANCEL_OR_ERROR(prepare_full_tiles(coord_dups, &tiles)); - const uint64_t tile_num = (tiles.empty() ? 0 : tiles.begin()->second.size()); + uint64_t tile_num = (tiles.empty() ? 0 : tiles.begin()->second.size()); if (tile_num == 0) { return Status::Ok(); } + // Compute tile metadata. + RETURN_CANCEL_OR_ERROR(compute_tiles_metadata(tiles)); + // Compute coordinate metadata (if coordinates are present) auto mbrs = compute_mbrs(tiles); - // Compute tile metadata. - RETURN_CANCEL_OR_ERROR(compute_tiles_metadata(tile_num, tiles)); + RETURN_NOT_OK(filter_tiles(&tiles)); - // Filter all tiles - RETURN_CANCEL_OR_ERROR(filter_tiles(&tiles)); + // include any prepared tiles from previous `submit` which were not flushed + for (const auto& it : buffers_) { + auto& last = global_write_state_->last_tiles_.at(it.first); + if (!last.empty()) { + const uint64_t num_leftover = last.size() - 1; + tiles.at(it.first).splice( + tiles.at(it.first).begin(), + last, + last.begin(), + std::next(last.begin(), num_leftover)); + } + } + tile_num = (tiles.empty() ? 0 : tiles.begin()->second.size()); const auto fragments = identify_fragment_tile_boundaries(tiles); - for (uint64_t f = 0; f < fragments.size(); f++) { + const uint64_t num_fragments_to_write = + (dense() ? fragments.size() - 1 : fragments.size()); + + for (uint64_t f = 0; f < num_fragments_to_write; f++) { const uint64_t input_start_tile = fragments[f].second; const uint64_t input_num_tiles = (f + 1 < fragments.size() ? fragments[f + 1].second : tile_num) - @@ -806,32 +866,41 @@ Status GlobalOrderWriter::global_write() { RETURN_CANCEL_OR_ERROR(start_new_fragment()); } - auto frag_meta = global_write_state_->frag_meta_; - - // update metadata of current fragment - frag_meta->set_num_tiles(frag_meta->tile_index_base() + input_num_tiles); - set_coords_metadata( input_start_tile, input_start_tile + input_num_tiles, tiles, mbrs, - frag_meta); + global_write_state_->frag_meta_); - // write tiles for all attributes - RETURN_CANCEL_OR_ERROR(write_tiles( - input_start_tile, - input_start_tile + input_num_tiles, - frag_meta, - &tiles)); + RETURN_CANCEL_OR_ERROR( + populate_fragment(tiles, input_start_tile, input_num_tiles)); } - - global_write_state_->frag_meta_->set_tile_index_base( - global_write_state_->frag_meta_->tile_index_base() + input_num_tiles); } - if (!fragments.empty()) { - current_fragment_size_ = fragments.back().first; + if (dense() && !fragments.empty()) { + const uint64_t num_unpopulated = + fragments.back().second; // FIXME: bad name, offset for tiles which + // weren't started yet + RETURN_CANCEL_OR_ERROR(start_new_fragment()); + set_coords_metadata( + num_unpopulated, + tile_num, + tiles, + mbrs, + global_write_state_->frag_meta_); + + current_fragment_size_ = 0; + + // buffer tiles which couldn't fit in memory + for (auto& attr : tiles) { + auto& last = global_write_state_->last_tiles_.at(attr.first); + last.splice( + last.begin(), + attr.second, + std::next(attr.second.begin(), fragments.back().second), + attr.second.end()); + } } return Status::Ok(); @@ -939,7 +1008,7 @@ Status GlobalOrderWriter::prepare_full_tiles_fixed( } // First fill the last tile - auto& last_tile = global_write_state_->last_tiles_.at(name)[0]; + auto& last_tile = *global_write_state_->last_tiles_.at(name).back(); uint64_t cell_idx = 0; uint64_t last_tile_cell_idx = global_write_state_->cells_written_[name] % cell_num_per_tile; @@ -1119,7 +1188,7 @@ Status GlobalOrderWriter::prepare_full_tiles_var( return Status::Ok(); // First fill the last tile - auto& last_tile = global_write_state_->last_tiles_.at(name)[0]; + auto& last_tile = *global_write_state_->last_tiles_.at(name).back(); auto& last_var_offset = global_write_state_->last_var_offsets_[name]; uint64_t cell_idx = 0; uint64_t last_tile_cell_idx = @@ -1396,7 +1465,7 @@ Status GlobalOrderWriter::prepare_full_tiles_var( /** * @return the number of tiles in a "hyper-row" of `subarray` within - * `arraydomain` + * `arrayschema` * * If a "hyper-rectangle" is a generalization of a rectangle to N dimensions, * then let's define a "hyper-row" to be a generalization of a row to N @@ -1404,16 +1473,21 @@ Status GlobalOrderWriter::prepare_full_tiles_var( * the outer-most dimension. */ static uint64_t compute_hyperrow_num_tiles( - const Domain& arraydomain, const NDRange& subarray) { + const ArraySchema& arrayschema, const NDRange& subarray) { + const uint64_t rdim = + (arrayschema.tile_order() == Layout::ROW_MAJOR ? + 0 : + arrayschema.dim_num() - 1); + NDRange adjusted = subarray; // normalize `adjusted` to a single hyper-row memcpy( - adjusted[0].end_fixed(), - adjusted[0].start_fixed(), - adjusted[0].size() / 2); + adjusted[rdim].end_fixed(), + adjusted[rdim].start_fixed(), + adjusted[rdim].size() / 2); - return arraydomain.tile_num(adjusted); + return arrayschema.domain().tile_num(adjusted); } /** @@ -1431,12 +1505,13 @@ static uint64_t compute_hyperrow_num_tiles( */ std::vector> GlobalOrderWriter::identify_fragment_tile_boundaries( - tdb::pmr::unordered_map& tiles) const { + const tdb::pmr::unordered_map& tiles) + const { // Cache variables to prevent map lookups. const auto buf_names = buffer_names(); std::vector var_size; std::vector nullable; - std::vector writer_tile_vectors; + std::vector writer_tile_vectors; var_size.reserve(buf_names.size()); nullable.reserve(buf_names.size()); writer_tile_vectors.reserve(buf_names.size()); @@ -1470,8 +1545,8 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( uint64_t hyperrow_offset = 0; std::optional hyperrow_num_tiles; if (dense()) { - hyperrow_num_tiles = compute_hyperrow_num_tiles( - array_schema_.domain(), subarray_.ndrange(0)); + hyperrow_num_tiles = + compute_hyperrow_num_tiles(array_schema_, subarray_.ndrange(0)); if (global_write_state_->frag_meta_) { hyperrow_offset = global_write_state_->dense_.domain_tile_offset_ + @@ -1511,10 +1586,12 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( if (running_tiles_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to write a single tile"); - } else if (!fragment_end.has_value() && fragment_size == 0) { - throw GlobalOrderWriterException( - "Fragment size is too small to subdivide dense subarray into " - "multiple fragments"); + } else if (!fragment_end.has_value()) { + if (fragment_size == 0) { + throw GlobalOrderWriterException( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + } } fragments.push_back(std::make_pair(fragment_size, fragment_start)); @@ -1554,13 +1631,14 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( * hyper-rectangle into two new hyper-rectangle */ static NDRange domain_tile_offset( - const Domain& arraydomain, + const ArraySchema& arrayschema, const NDRange& domain, uint64_t start_tile, uint64_t num_tiles) { + const Domain& arraydomain = arrayschema.domain(); const uint64_t domain_num_tiles = arraydomain.tile_num(domain); const uint64_t hyperrow_num_tiles = - compute_hyperrow_num_tiles(arraydomain, domain); + compute_hyperrow_num_tiles(arrayschema, domain); iassert(domain_num_tiles % hyperrow_num_tiles == 0); iassert(start_tile % hyperrow_num_tiles == 0); iassert(num_tiles % hyperrow_num_tiles == 0); @@ -1569,14 +1647,19 @@ static NDRange domain_tile_offset( const uint64_t num_hyperrows = num_tiles / hyperrow_num_tiles; iassert(num_hyperrows > 0); + const uint64_t rdim = + (arrayschema.tile_order() == Layout::ROW_MAJOR ? + 0 : + arrayschema.dim_num() - 1); + NDRange adjusted = domain; auto fix_bounds = [&](T) { - const T extent = arraydomain.tile_extent(0).rvalue_as(); - const T lower_bound = *static_cast(domain[0].start_fixed()); - const T upper_bound = *static_cast(domain[0].end_fixed()); - T* start = static_cast(adjusted[0].start_fixed()); - T* end = static_cast(adjusted[0].end_fixed()); + const T extent = arraydomain.tile_extent(rdim).rvalue_as(); + const T lower_bound = *static_cast(domain[rdim].start_fixed()); + const T upper_bound = *static_cast(domain[rdim].end_fixed()); + T* start = static_cast(adjusted[rdim].start_fixed()); + T* end = static_cast(adjusted[rdim].end_fixed()); // tiles begin at [LB, LB + E, LB + 2E, ...] where LB is lower bound, E is // extent @@ -1587,7 +1670,7 @@ static NDRange domain_tile_offset( *start = std::max(lower_bound, align(*start + extent * start_hyperrow)); *end = std::min(upper_bound, align(*start + extent * num_hyperrows) - 1); }; - apply_with_type(fix_bounds, arraydomain.dimension_ptr(0)->type()); + apply_with_type(fix_bounds, arraydomain.dimension_ptr(rdim)->type()); return adjusted; } @@ -1606,7 +1689,7 @@ Status GlobalOrderWriter::start_new_fragment() { const uint64_t num_tiles_in_fragment = frag_meta->loaded_metadata()->tile_offsets()[0].size(); NDRange fragment_domain = domain_tile_offset( - array_schema_.domain(), + array_schema_, subarray_.ndrange(0), global_write_state_->dense_.domain_tile_offset_, num_tiles_in_fragment); diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index 9ab8de991f2..a63d89e48d0 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -75,6 +75,16 @@ class GlobalOrderWriter : public WriterBase { * attributes/dimensions, the first tile is the offsets tile, whereas the * second tile is the values tile. In both cases, the third tile stores a * validity tile for nullable attributes. + * + * For sparse arrays, each `WriterTileTupleVector` contains up to one tile, + * which is the data from the previous `submit` which did not fill a tile. + * + * For dense arrays, each `WriterTileTupleVector` contains any tiles which + * were not guaranteed to fit into `max_fragment_size_` while also forming + * a bounding rectangle. Written fragments always have a rectangular domain, + * and it is necessary to buffer tiles this way to avoid flushing data + * which might later require a fragment to exceed `max_fragment_size_` + * in order to represent a rectangular domain. */ tdb::pmr::unordered_map last_tiles_; @@ -396,7 +406,20 @@ class GlobalOrderWriter : public WriterBase { * `start_tile` */ std::vector> identify_fragment_tile_boundaries( - tdb::pmr::unordered_map& tiles) const; + const tdb::pmr::unordered_map& tiles) + const; + + /** + * Writes cells from the indicated slice of `tiles` into the current fragment. + * + * @param tiles the source of cells organized into filtered tiles + * @param tile_offset the tile from which to begin writing + * @param num_tiles the number of tiles to write + */ + Status populate_fragment( + tdb::pmr::unordered_map& tiles, + uint64_t tile_offset, + uint64_t num_tiles); /** * Close the current fragment and start a new one. The closed fragment will From c063fcec6b9f3e41486c04063376aa8a3bd4b2ed Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 17 Oct 2025 12:01:19 -0400 Subject: [PATCH 037/109] Fix num tiles for sparse --- tiledb/sm/query/writers/global_order_writer.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 683fb33759e..3c879713e09 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -777,9 +777,6 @@ Status GlobalOrderWriter::populate_fragment( uint64_t num_tiles) { auto frag_meta = global_write_state_->frag_meta_; - // update metadata of current fragment - frag_meta->set_num_tiles(frag_meta->tile_index_base() + num_tiles); - // write tiles for all attributes RETURN_CANCEL_OR_ERROR( write_tiles(tile_offset, tile_offset + num_tiles, frag_meta, &tiles)); @@ -866,6 +863,9 @@ Status GlobalOrderWriter::global_write() { RETURN_CANCEL_OR_ERROR(start_new_fragment()); } + global_write_state_->frag_meta_->set_num_tiles( + global_write_state_->frag_meta_->tile_index_base() + input_num_tiles); + set_coords_metadata( input_start_tile, input_start_tile + input_num_tiles, @@ -883,6 +883,11 @@ Status GlobalOrderWriter::global_write() { fragments.back().second; // FIXME: bad name, offset for tiles which // weren't started yet RETURN_CANCEL_OR_ERROR(start_new_fragment()); + + global_write_state_->frag_meta_->set_num_tiles( + global_write_state_->frag_meta_->tile_index_base() + tile_num - + num_unpopulated); + set_coords_metadata( num_unpopulated, tile_num, From 37be646547f30dae80a5a727d5ddcd2170498b49 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 17 Oct 2025 12:51:08 -0400 Subject: [PATCH 038/109] Do not always start new fragment with unpopulated tiles --- .../sm/query/writers/global_order_writer.cc | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 3c879713e09..66f968fe54c 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -879,23 +879,21 @@ Status GlobalOrderWriter::global_write() { } if (dense() && !fragments.empty()) { - const uint64_t num_unpopulated = - fragments.back().second; // FIXME: bad name, offset for tiles which - // weren't started yet - RETURN_CANCEL_OR_ERROR(start_new_fragment()); + const uint64_t offset_not_written = fragments.back().second; + + if (!global_write_state_->frag_meta_) { + RETURN_CANCEL_OR_ERROR(start_new_fragment()); + } global_write_state_->frag_meta_->set_num_tiles( global_write_state_->frag_meta_->tile_index_base() + tile_num - - num_unpopulated); - - set_coords_metadata( - num_unpopulated, - tile_num, - tiles, - mbrs, - global_write_state_->frag_meta_); + offset_not_written); - current_fragment_size_ = 0; + // Dense array does not have bounding rectangles. + // If there were any other tile metadata which we needed to draw from the + // un-filtered tiles, we would have to store that in the global write state + // here. But there is no other such metadata. + iassert(mbrs.empty()); // buffer tiles which couldn't fit in memory for (auto& attr : tiles) { From 286664547d24016a3da00091963666b384f3c813 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 17 Oct 2025 13:54:41 -0400 Subject: [PATCH 039/109] Validate fragment domains before fragment size --- test/src/unit-cppapi-max-fragment-size.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 640ba2a2623..2c2ac83b7e4 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -640,17 +640,6 @@ instance_dense_global_order( FragmentInfo finfo(ctx, array_name); finfo.load(); - // validate fragment size - for (uint32_t f = 0; f < finfo.fragment_num(); f++) { - const uint64_t fsize = finfo.fragment_size(f); - const uint64_t fmetasize = finfo.ptr() - ->fragment_info() - ->single_fragment_info_vec()[f] - .meta() - ->fragment_meta_size(); - ASSERTER(fsize <= max_fragment_size + fmetasize); - } - // collect fragment domains std::vector>> fragment_domains; for (uint32_t f = 0; f < finfo.fragment_num(); f++) { @@ -714,6 +703,17 @@ instance_dense_global_order( } } + // validate fragment size + for (uint32_t f = 0; f < finfo.fragment_num(); f++) { + const uint64_t fsize = finfo.fragment_size(f); + const uint64_t fmetasize = finfo.ptr() + ->fragment_info() + ->single_fragment_info_vec()[f] + .meta() + ->fragment_meta_size(); + ASSERTER(fsize <= max_fragment_size + fmetasize); + } + // this is last because a fragment domain mismatch is more informative ASSERTER(a_read == a_write); From 309516da56e493f9e7e2892fedb43ba0ea78e0b9 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 17 Oct 2025 13:55:05 -0400 Subject: [PATCH 040/109] start new fragment if needed for lingering tiles --- tiledb/sm/query/writers/global_order_writer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 66f968fe54c..5070b69d9b2 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -881,7 +881,7 @@ Status GlobalOrderWriter::global_write() { if (dense() && !fragments.empty()) { const uint64_t offset_not_written = fragments.back().second; - if (!global_write_state_->frag_meta_) { + if (!global_write_state_->frag_meta_ || fragments.size() > 1) { RETURN_CANCEL_OR_ERROR(start_new_fragment()); } From 2acd928617f3a93d1f26bc674957c3c170d80249 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 17 Oct 2025 14:21:06 -0400 Subject: [PATCH 041/109] Fix rapidcheck num_tiles_per_hyperrow calc --- test/src/unit-cppapi-max-fragment-size.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 2c2ac83b7e4..f5943673eb7 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1040,7 +1040,9 @@ TEST_CASE( const auto subarray = *rc::make_tile_aligned_subarray({d1, d2}); - const uint64_t num_tiles_per_hyperrow = d2.num_tiles(subarray[1]); + const uint64_t num_tiles_per_hyperrow = + (tile_order == TILEDB_ROW_MAJOR ? d2.num_tiles(subarray[1]) : + d1.num_tiles(subarray[0])); auto gen_fragment_size = rc::gen::map( rc::gen::inRange(1, 8), From ecfa94743971c00d7ad4ce4d38849ed33841eddb Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 17 Oct 2025 14:21:21 -0400 Subject: [PATCH 042/109] Add 3D rapidcheck test --- test/src/unit-cppapi-max-fragment-size.cc | 52 +++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index f5943673eb7..dcacc75fb40 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1066,3 +1066,55 @@ TEST_CASE( ctx, tile_order, cell_order, max_fragment_size, {d1, d2}, subarray); }); } + +TEST_CASE( + "C++ API: Max fragment size dense array rapidcheck 3d", + "[cppapi][max-frag-size][rapidcheck]") { + Context ctx; + rc::prop("max fragment size dense 3d", [ctx]() { + static constexpr auto DT = sm::Datatype::UINT64; + templates::Dimension
d1 = *rc::make_dimension
(32); + templates::Dimension
d2 = *rc::make_dimension
(32); + templates::Dimension
d3 = *rc::make_dimension
(32); + + const uint64_t num_cells_per_tile = d1.extent * d2.extent * d3.extent; + + RC_PRE(num_cells_per_tile <= 32 * 32 * 32); + + const tiledb_layout_t tile_order = + *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + const tiledb_layout_t cell_order = + *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + + const uint64_t tile_size = num_cells_per_tile * sizeof(int); + const uint64_t filter_chunk_size = + sm::WriterTile::compute_chunk_size(tile_size, sizeof(int)); + const uint64_t num_filter_chunks_per_tile = + (tile_size + filter_chunk_size - 1) / filter_chunk_size; + + const uint64_t estimate_single_tile_fragment_size = + num_cells_per_tile * sizeof(int) // data + + sizeof(uint64_t) // prefix containing the number of chunks + + num_filter_chunks_per_tile * 3 * sizeof(uint32_t); // chunk sizes + + const auto subarray = + *rc::make_tile_aligned_subarray({d1, d2, d3}); + + const uint64_t num_tiles_per_hyperrow = + (tile_order == TILEDB_ROW_MAJOR ? + d2.num_tiles(subarray[1]) * d3.num_tiles(subarray[2]) : + d1.num_tiles(subarray[0]) * d2.num_tiles(subarray[1])); + + auto gen_fragment_size = rc::gen::map( + rc::gen::inRange(1, 8), + [num_tiles_per_hyperrow, + estimate_single_tile_fragment_size](uint64_t scale) { + return num_tiles_per_hyperrow * estimate_single_tile_fragment_size * + scale; + }); + const uint64_t max_fragment_size = *gen_fragment_size; + + instance_dense_global_order( + ctx, tile_order, cell_order, max_fragment_size, {d1, d2, d3}, subarray); + }); +} From 1b5a408a286f10a0e06a74c85b36ec2b51002dee Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 20 Oct 2025 10:14:35 -0400 Subject: [PATCH 043/109] Add extra validation that adjacent fragments must exceed the max fragment size --- test/src/unit-cppapi-max-fragment-size.cc | 26 +++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index dcacc75fb40..2eeda8599cd 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -703,17 +703,31 @@ instance_dense_global_order( } } - // validate fragment size + auto meta_size = [&finfo](uint32_t f) -> uint64_t { + return finfo.ptr() + ->fragment_info() + ->single_fragment_info_vec()[f] + .meta() + ->fragment_meta_size(); + }; + + // validate fragment size - no fragment should be larger than max requested + // size for (uint32_t f = 0; f < finfo.fragment_num(); f++) { const uint64_t fsize = finfo.fragment_size(f); - const uint64_t fmetasize = finfo.ptr() - ->fragment_info() - ->single_fragment_info_vec()[f] - .meta() - ->fragment_meta_size(); + const uint64_t fmetasize = meta_size(f); ASSERTER(fsize <= max_fragment_size + fmetasize); } + // validate fragment size - we wrote the largest possible fragments (no two + // adjacent should be under max fragment size) + for (uint32_t f = 1; f < finfo.fragment_num(); f++) { + const uint64_t combined_size = + finfo.fragment_size(f - 1) + finfo.fragment_size(f); + const uint64_t combined_meta_size = meta_size(f - 1) + meta_size(f); + ASSERTER(combined_size > max_fragment_size + combined_meta_size); + } + // this is last because a fragment domain mismatch is more informative ASSERTER(a_read == a_write); From f9a92891f53229d8a180733131f43b3e57629d1c Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 20 Oct 2025 10:42:58 -0400 Subject: [PATCH 044/109] Refactor positive rapidcheck tests and add 1D --- test/src/unit-cppapi-max-fragment-size.cc | 165 ++++++++++------------ 1 file changed, 76 insertions(+), 89 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 2eeda8599cd..f7cdf5dd7e4 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1022,62 +1022,87 @@ make_tile_aligned_subarray( } // namespace rc +/** + * Generates an arbitrary expected-to-not-error input to + * `instance_dense_global_order` of an appropriate size for the given + * `dimensions`. + * + * "Appropriate size" means tiles with at most `1024 * 128` cells, and a write + * domain with at most `1024 * 1024 * 4` cells (see + * `make_tile_aligned_subarray`). We expect that this should allow inputs which + * are large enough to be interesting but not so large that each instance takes + * a long time. + * + * Inputs generated by this test function are expected to successfully write + * fragments within the generated max fragment size. The maximum fragment size + * is a number of bytes which represents between 1 and 8 hyperrows. + */ +template +void rapidcheck_dense_array( + Context& ctx, const std::vector>& dimensions) { + uint64_t num_cells_per_tile = 1; + for (const auto& dim : dimensions) { + num_cells_per_tile *= dim.extent; + } + RC_PRE(num_cells_per_tile <= 1024 * 128); + + const tiledb_layout_t tile_order = + *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + const tiledb_layout_t cell_order = + *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + + const uint64_t tile_size = num_cells_per_tile * sizeof(int); + const uint64_t filter_chunk_size = + sm::WriterTile::compute_chunk_size(tile_size, sizeof(int)); + const uint64_t num_filter_chunks_per_tile = + (tile_size + filter_chunk_size - 1) / filter_chunk_size; + + const uint64_t estimate_single_tile_fragment_size = + num_cells_per_tile * sizeof(int) // data + + sizeof(uint64_t) // prefix containing the number of chunks + + num_filter_chunks_per_tile * 3 * sizeof(uint32_t); // chunk sizes + + const auto subarray = + *rc::make_tile_aligned_subarray(dimensions); + + uint64_t num_tiles_per_hyperrow = 1; + for (uint64_t i = 0; i < dimensions.size() - 1; i++) { + const uint64_t dim = + (tile_order == TILEDB_ROW_MAJOR ? i + 1 : dimensions.size() - i - 2); + num_tiles_per_hyperrow *= dimensions[dim].num_tiles(subarray[dim]); + } + + auto gen_fragment_size = rc::gen::inRange( + num_tiles_per_hyperrow * estimate_single_tile_fragment_size * 1, + num_tiles_per_hyperrow * estimate_single_tile_fragment_size * 8); + const uint64_t max_fragment_size = *gen_fragment_size; + + instance_dense_global_order( + ctx, tile_order, cell_order, max_fragment_size, dimensions, subarray); +} + +TEST_CASE( + "C++ API: Max fragment size dense array rapidcheck 1d", + "[cppapi][max-frag-size][rapidcheck]") { + Context ctx; + rc::prop("max fragment size dense 1d", [&ctx]() { + static constexpr auto DT = sm::Datatype::UINT64; + templates::Dimension
d1 = *rc::make_dimension
(8192); + + rapidcheck_dense_array
(ctx, {d1}); + }); +} + TEST_CASE( "C++ API: Max fragment size dense array rapidcheck 2d", "[cppapi][max-frag-size][rapidcheck]") { Context ctx; - rc::prop("max fragment size dense 2d", [ctx]() { + rc::prop("max fragment size dense 2d", [&ctx]() { static constexpr auto DT = sm::Datatype::UINT64; templates::Dimension
d1 = *rc::make_dimension
(128); templates::Dimension
d2 = *rc::make_dimension
(128); - const std::optional num_cells_per_tile = - checked_arithmetic::mul(d1.extent, d2.extent); - RC_PRE(num_cells_per_tile.has_value()); - RC_PRE(num_cells_per_tile.value() <= 1024 * 128); - - const tiledb_layout_t tile_order = - *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); - const tiledb_layout_t cell_order = - *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); - - const uint64_t tile_size = num_cells_per_tile.value() * sizeof(int); - const uint64_t filter_chunk_size = - sm::WriterTile::compute_chunk_size(tile_size, sizeof(int)); - const uint64_t num_filter_chunks_per_tile = - (tile_size + filter_chunk_size - 1) / filter_chunk_size; - - const uint64_t estimate_single_tile_fragment_size = - num_cells_per_tile.value() * sizeof(int) // data - + sizeof(uint64_t) // prefix containing the number of chunks - + num_filter_chunks_per_tile * 3 * sizeof(uint32_t); // chunk sizes - - const auto subarray = - *rc::make_tile_aligned_subarray({d1, d2}); - - const uint64_t num_tiles_per_hyperrow = - (tile_order == TILEDB_ROW_MAJOR ? d2.num_tiles(subarray[1]) : - d1.num_tiles(subarray[0])); - - auto gen_fragment_size = rc::gen::map( - rc::gen::inRange(1, 8), - [num_tiles_per_hyperrow, - estimate_single_tile_fragment_size](uint64_t scale) { - return num_tiles_per_hyperrow * estimate_single_tile_fragment_size * - scale; - }); - const uint64_t max_fragment_size = *gen_fragment_size; - - std::cerr << std::endl << "d1: "; - rc::show(d1, std::cerr); - std::cerr << std::endl << "d2: "; - rc::show(d2, std::cerr); - std::cerr << std::endl << "subarray: "; - rc::show(subarray, std::cerr); - std::cerr << std::endl - << "max_fragment_size: " << max_fragment_size << std::endl; - - instance_dense_global_order( - ctx, tile_order, cell_order, max_fragment_size, {d1, d2}, subarray); + + rapidcheck_dense_array
(ctx, {d1, d2}); }); } @@ -1085,50 +1110,12 @@ TEST_CASE( "C++ API: Max fragment size dense array rapidcheck 3d", "[cppapi][max-frag-size][rapidcheck]") { Context ctx; - rc::prop("max fragment size dense 3d", [ctx]() { + rc::prop("max fragment size dense 3d", [&ctx]() { static constexpr auto DT = sm::Datatype::UINT64; templates::Dimension
d1 = *rc::make_dimension
(32); templates::Dimension
d2 = *rc::make_dimension
(32); templates::Dimension
d3 = *rc::make_dimension
(32); - const uint64_t num_cells_per_tile = d1.extent * d2.extent * d3.extent; - - RC_PRE(num_cells_per_tile <= 32 * 32 * 32); - - const tiledb_layout_t tile_order = - *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); - const tiledb_layout_t cell_order = - *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); - - const uint64_t tile_size = num_cells_per_tile * sizeof(int); - const uint64_t filter_chunk_size = - sm::WriterTile::compute_chunk_size(tile_size, sizeof(int)); - const uint64_t num_filter_chunks_per_tile = - (tile_size + filter_chunk_size - 1) / filter_chunk_size; - - const uint64_t estimate_single_tile_fragment_size = - num_cells_per_tile * sizeof(int) // data - + sizeof(uint64_t) // prefix containing the number of chunks - + num_filter_chunks_per_tile * 3 * sizeof(uint32_t); // chunk sizes - - const auto subarray = - *rc::make_tile_aligned_subarray({d1, d2, d3}); - - const uint64_t num_tiles_per_hyperrow = - (tile_order == TILEDB_ROW_MAJOR ? - d2.num_tiles(subarray[1]) * d3.num_tiles(subarray[2]) : - d1.num_tiles(subarray[0]) * d2.num_tiles(subarray[1])); - - auto gen_fragment_size = rc::gen::map( - rc::gen::inRange(1, 8), - [num_tiles_per_hyperrow, - estimate_single_tile_fragment_size](uint64_t scale) { - return num_tiles_per_hyperrow * estimate_single_tile_fragment_size * - scale; - }); - const uint64_t max_fragment_size = *gen_fragment_size; - - instance_dense_global_order( - ctx, tile_order, cell_order, max_fragment_size, {d1, d2, d3}, subarray); + rapidcheck_dense_array
(ctx, {d1, d2, d3}); }); } From bbe26094ecebf2b982797617fd371b8560974f35 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 20 Oct 2025 14:05:18 -0400 Subject: [PATCH 045/109] Refactor to return struct FragmentTileBoundaries --- .../sm/query/writers/global_order_writer.cc | 79 ++++++++++++++----- tiledb/sm/query/writers/global_order_writer.h | 6 +- 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 5070b69d9b2..7d9e760ee16 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -74,6 +74,36 @@ class GlobalOrderWriterException : public StatusException { } }; +namespace global_order_writer { + +/** + * Contains the return values of + * `GlobalOrderWriter::identify_fragment_tile_boundaries`. + */ +struct FragmentTileBoundaries { + /** + * The offsets where each complete fragment starts. + */ + std::vector tile_offsets_; + + /** + * The number of writeable tiles. + * For sparse arrays this is the number of tiles of input. + * For dense arrays this may be less if there is a trail of tiles which cannot + * be guaranteed to fit within `max_fragment_size` while also forming a + * rectangular domain. + */ + uint64_t num_writeable_tiles_; + + /** + * The size in bytes of the filtered tiles which are written to the last + * fragment. The last fragment may be resumed by a subsequent `submit`. + */ + uint64_t last_fragment_size_; +}; + +} // namespace global_order_writer + /* ****************************** */ /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ @@ -841,14 +871,12 @@ Status GlobalOrderWriter::global_write() { const auto fragments = identify_fragment_tile_boundaries(tiles); - const uint64_t num_fragments_to_write = - (dense() ? fragments.size() - 1 : fragments.size()); - - for (uint64_t f = 0; f < num_fragments_to_write; f++) { - const uint64_t input_start_tile = fragments[f].second; - const uint64_t input_num_tiles = - (f + 1 < fragments.size() ? fragments[f + 1].second : tile_num) - - input_start_tile; + for (uint64_t f = 0; f < fragments.tile_offsets_.size(); f++) { + const uint64_t input_start_tile = fragments.tile_offsets_[f]; + const uint64_t input_num_tiles = (f + 1 < fragments.tile_offsets_.size() ? + fragments.tile_offsets_[f + 1] : + fragments.num_writeable_tiles_) - + input_start_tile; if (input_num_tiles == 0) { // this should only happen if there is only one tile of input and we have @@ -856,7 +884,7 @@ Status GlobalOrderWriter::global_write() { // and there is no more room iassert(f == 0); if (current_fragment_size_ == 0) { - iassert(fragments.size() == 1); + iassert(fragments.tile_offsets_.size() == 1); } } else { if (f > 0 || !global_write_state_->frag_meta_) { @@ -878,10 +906,16 @@ Status GlobalOrderWriter::global_write() { } } - if (dense() && !fragments.empty()) { - const uint64_t offset_not_written = fragments.back().second; + current_fragment_size_ = fragments.last_fragment_size_; + + if (fragments.num_writeable_tiles_ < tile_num) { + // sparse array should be able to write everything + iassert(dense()); - if (!global_write_state_->frag_meta_ || fragments.size() > 1) { + const uint64_t offset_not_written = fragments.num_writeable_tiles_; + + if (!global_write_state_->frag_meta_ || + fragments.tile_offsets_.size() > 1) { RETURN_CANCEL_OR_ERROR(start_new_fragment()); } @@ -901,7 +935,7 @@ Status GlobalOrderWriter::global_write() { last.splice( last.begin(), attr.second, - std::next(attr.second.begin(), fragments.back().second), + std::next(attr.second.begin(), offset_not_written), attr.second.end()); } } @@ -1506,7 +1540,7 @@ static uint64_t compute_hyperrow_num_tiles( * @return a list of (fragment size, tile offset) pairs identifying the division * of input data into target fragments */ -std::vector> +global_order_writer::FragmentTileBoundaries GlobalOrderWriter::identify_fragment_tile_boundaries( const tdb::pmr::unordered_map& tiles) const { @@ -1541,9 +1575,13 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( uint64_t running_tiles_size = current_fragment_size_; uint64_t fragment_size = current_fragment_size_; + // NB: gcc has a false positive uninitialized use warning for `fragment_end` +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" uint64_t fragment_start = 0; std::optional fragment_end; - std::vector> fragments; + std::vector fragments; +#pragma GCC diagnostic pop uint64_t hyperrow_offset = 0; std::optional hyperrow_num_tiles; @@ -1597,7 +1635,7 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( } } - fragments.push_back(std::make_pair(fragment_size, fragment_start)); + fragments.push_back(fragment_start); iassert(running_tiles_size >= fragment_size); running_tiles_size -= fragment_size; @@ -1617,9 +1655,14 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( running_tiles_size += tile_size; } - fragments.push_back(std::make_pair(running_tiles_size, fragment_start)); + if (fragment_end.has_value()) { + fragments.push_back(fragment_start); + } - return fragments; + return global_order_writer::FragmentTileBoundaries{ + .tile_offsets_ = fragments, + .num_writeable_tiles_ = fragment_end.value_or(fragment_start), + .last_fragment_size_ = fragment_size}; } /** diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index a63d89e48d0..445ea4bebd5 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -46,6 +46,10 @@ using namespace tiledb::common; namespace tiledb { namespace sm { +namespace global_order_writer { +struct FragmentTileBoundaries; +} + /** Processes write queries. */ class GlobalOrderWriter : public WriterBase { public: @@ -405,7 +409,7 @@ class GlobalOrderWriter : public WriterBase { * @return a list of `(fragment_size, start_tile)` pairs ordered on * `start_tile` */ - std::vector> identify_fragment_tile_boundaries( + global_order_writer::FragmentTileBoundaries identify_fragment_tile_boundaries( const tdb::pmr::unordered_map& tiles) const; From 3dc25595409241efc18ca05de141cda7bd48e44a Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 20 Oct 2025 14:51:59 -0400 Subject: [PATCH 046/109] WriterTileTuple::filtered_size --- .../sm/query/writers/global_order_writer.cc | 27 +------------------ tiledb/sm/tile/writer_tile_tuple.cc | 15 +++++++++++ tiledb/sm/tile/writer_tile_tuple.h | 6 +++++ 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 7d9e760ee16..232ca755d53 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1546,15 +1546,9 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( const { // Cache variables to prevent map lookups. const auto buf_names = buffer_names(); - std::vector var_size; - std::vector nullable; std::vector writer_tile_vectors; - var_size.reserve(buf_names.size()); - nullable.reserve(buf_names.size()); writer_tile_vectors.reserve(buf_names.size()); for (auto& name : buf_names) { - var_size.emplace_back(array_schema_.var_size(name)); - nullable.emplace_back(array_schema_.is_nullable(name)); writer_tile_vectors.emplace_back(&tiles.at(name)); } @@ -1599,26 +1593,7 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( for (uint64_t t = 0; t < tile_num; t++) { uint64_t tile_size = 0; for (uint64_t a = 0; a < buf_names.size(); a++) { - if (var_size[a]) { - tile_size += writer_tile_vectors[a] - ->at(t) - .offset_tile() - .filtered_buffer() - .size(); - tile_size += - writer_tile_vectors[a]->at(t).var_tile().filtered_buffer().size(); - } else { - tile_size += - writer_tile_vectors[a]->at(t).fixed_tile().filtered_buffer().size(); - } - - if (nullable[a]) { - tile_size += writer_tile_vectors[a] - ->at(t) - .validity_tile() - .filtered_buffer() - .size(); - } + tile_size += writer_tile_vectors[a]->at(t).filtered_size().value(); } // NB: normally this should only hit once, but if there is a single diff --git a/tiledb/sm/tile/writer_tile_tuple.cc b/tiledb/sm/tile/writer_tile_tuple.cc index 9ce07d20f95..e6e823ec153 100644 --- a/tiledb/sm/tile/writer_tile_tuple.cc +++ b/tiledb/sm/tile/writer_tile_tuple.cc @@ -114,5 +114,20 @@ void WriterTileTuple::set_metadata( } } +std::optional WriterTileTuple::filtered_size() const { + uint64_t tile_size = 0; + if (var_size()) { + tile_size += offset_tile().filtered_buffer().size(); + tile_size += var_tile().filtered_buffer().size(); + } else { + tile_size += fixed_tile().filtered_buffer().size(); + } + + if (nullable()) { + tile_size += validity_tile().filtered_buffer().size(); + } + return tile_size; +} + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/tile/writer_tile_tuple.h b/tiledb/sm/tile/writer_tile_tuple.h index 8a2ca28938b..37339ecc836 100644 --- a/tiledb/sm/tile/writer_tile_tuple.h +++ b/tiledb/sm/tile/writer_tile_tuple.h @@ -212,6 +212,12 @@ class WriterTileTuple { return cell_num_; } + /** + * @return the total size of the filtered tiles, or `std::nullopt` if not + * filtered. + */ + std::optional filtered_size() const; + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ From 9a5383a3220ffcab4689feeb45177bc540bd70c0 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 20 Oct 2025 14:52:45 -0400 Subject: [PATCH 047/109] Add test for requirement about how tiles kept in memory after submit --- test/src/unit-cppapi-max-fragment-size.cc | 45 +++++++++++++++++++ .../sm/query/writers/global_order_writer.cc | 5 +++ tiledb/sm/query/writers/global_order_writer.h | 3 ++ 3 files changed, 53 insertions(+) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index f7cdf5dd7e4..1c61a2ecddc 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -44,9 +44,11 @@ #include "tiledb/sm/c_api/tiledb_struct_def.h" #include "tiledb/sm/cpp_api/tiledb" #include "tiledb/sm/misc/constants.h" +#include "tiledb/sm/query/writers/global_order_writer.h" #include "tiledb/sm/tile/tile.h" #include +#include using namespace tiledb; using namespace tiledb::test; @@ -590,6 +592,13 @@ instance_dense_global_order( api_subarray.push_back(sub_dim.upper_bound); } + uint64_t num_tiles_per_hyperrow = 1; + for (uint64_t i = 0; i < dimensions.size() - 1; i++) { + const uint64_t dim = + (tile_order == TILEDB_ROW_MAJOR ? i + 1 : dimensions.size() - i - 2); + num_tiles_per_hyperrow *= dimensions[dim].num_tiles(subarray[dim]); + } + // write data, should be split into multiple fragments { Array array(ctx, array_name, TILEDB_WRITE); @@ -613,6 +622,42 @@ instance_dense_global_order( ASSERTER(status == Query::Status::COMPLETE); cells_written += cells_this_write; + + const auto w = dynamic_cast( + query.ptr()->query_->strategy()); + ASSERTER(w); + const auto g = w->get_global_state(); + ASSERTER(g); + + // Check assumptions about memory buffering. + // There may be a tail of tiles for which we cannot infer whether they + // would fit in the current fragment while also forming a rectangle. + // The writer keeps these in memory until it has enough information + // in the next `submit`. Check our assumptions about those tiles. + uint64_t in_memory_size = 0; + std::optional in_memory_num_tiles; + for (const auto& field : g->last_tiles_) { + // NB: there should always be at least one tile which contains the state + // of the current fragment + ASSERTER(!field.second.empty()); + + for (uint64_t t = 0; t < field.second.size() - 1; t++) { + const auto s = field.second[t].filtered_size(); + ASSERTER(s.has_value()); + in_memory_size += s.value(); + } + + if (in_memory_num_tiles.has_value()) { + ASSERTER(field.second.size() - 1 == in_memory_num_tiles.value()); + } else { + in_memory_num_tiles = field.second.size() - 1; + } + } + // it should be an error if they exceed the max fragment size + ASSERTER(in_memory_size <= max_fragment_size); + // and if they form a rectangle then we could have written some out + ASSERTER(in_memory_num_tiles.has_value()); + ASSERTER(in_memory_num_tiles.value() < num_tiles_per_hyperrow); } query.finalize(); diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 232ca755d53..215f4b2f038 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -265,6 +265,11 @@ GlobalOrderWriter::GlobalWriteState* GlobalOrderWriter::get_global_state() { return global_write_state_.get(); } +const GlobalOrderWriter::GlobalWriteState* GlobalOrderWriter::get_global_state() + const { + return global_write_state_.get(); +} + std::pair> GlobalOrderWriter::multipart_upload_state(bool client) { if (client) { diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index 445ea4bebd5..7991e3ec432 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -184,6 +184,9 @@ class GlobalOrderWriter : public WriterBase { /** Returns a bare pointer to the global state. */ GlobalWriteState* get_global_state(); + /** Returns a bare pointer to the global state. */ + const GlobalWriteState* get_global_state() const; + /** * Used in serialization to share the multipart upload state * among cloud executors From ea72c8534de80b2fb91c999be53ce2f3088650ea Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 22 Oct 2025 14:21:29 -0400 Subject: [PATCH 048/109] unit_domain_tile_offset.cc, is_rectangular_domain passes for 2D --- .../rapidcheck/array_schema_templates.h | 216 +++++++++++++++++ test/support/rapidcheck/array_templates.h | 159 +------------ test/support/src/array_schema_templates.h | 217 ++++++++++++++++++ test/support/src/array_templates.h | 179 +-------------- tiledb/sm/query/test/CMakeLists.txt | 2 +- .../sm/query/test/unit_domain_tile_offset.cc | 214 +++++++++++++++++ 6 files changed, 651 insertions(+), 336 deletions(-) create mode 100644 test/support/rapidcheck/array_schema_templates.h create mode 100644 test/support/src/array_schema_templates.h create mode 100644 tiledb/sm/query/test/unit_domain_tile_offset.cc diff --git a/test/support/rapidcheck/array_schema_templates.h b/test/support/rapidcheck/array_schema_templates.h new file mode 100644 index 00000000000..084be02935d --- /dev/null +++ b/test/support/rapidcheck/array_schema_templates.h @@ -0,0 +1,216 @@ +/** + * @file test/support/rapidcheck/array_schema_templates.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines rapidcheck generators for the structures + * defined in test/support/src/array_schema_templates.h. + */ + +#ifndef TILEDB_RAPIDCHECK_ARRAY_SCHEMA_H +#define TILEDB_RAPIDCHECK_ARRAY_SCHEMA_H + +#include +#include +#include + +#include "tiledb/common/arithmetic.h" + +namespace rc { + +using namespace tiledb::test; +using namespace tiledb::test::templates; + +template +Gen> make_domain(std::optional bound = std::nullopt) { + auto bounds = gen::mapcat(gen::arbitrary(), [bound](D lb) { + const D ub_limit = + (bound.has_value() ? + tiledb::common::checked_arithmetic::add(lb, bound.value()) + .value_or(std::numeric_limits::max()) : + std::numeric_limits::max()); + if constexpr (std::is_same_v || std::is_same_v) { + return gen::pair(gen::just(lb), gen::inRange(lb, ub_limit)); + } else { + // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is + // inclusive. So we have to use `int64_t` to avoid overflow. + return gen::pair( + gen::just(lb), + gen::cast(gen::inRange(int64_t(lb), int64_t(ub_limit) + 1))); + } + }); + + return gen::map(bounds, [](std::pair bounds) { + return templates::Domain(bounds.first, bounds.second); + }); +} + +template +struct Arbitrary> { + static Gen> arbitrary() { + return make_domain(); + } +}; + +/** + * @return `a - b` if it does not overflow, `std::nullopt` if it does + */ +template +std::optional checked_sub(T a, T b) { + if (!std::is_signed::value) { + return (b > a ? std::nullopt : std::optional(a - b)); + } else if (b < 0) { + return ( + std::numeric_limits::max() + b < a ? std::nullopt : + std::optional(a - b)); + } else { + return ( + std::numeric_limits::min() - b > a ? std::nullopt : + std::optional(a - b)); + } +} + +template +Gen make_extent( + const templates::Domain& domain, std::optional bound = std::nullopt) { + // upper bound on all possible extents to avoid unreasonably + // huge tile sizes + static constexpr D extent_limit = static_cast( + std::is_signed::value ? + std::min( + static_cast(std::numeric_limits::max()), + static_cast(1024 * 16)) : + std::min( + static_cast(std::numeric_limits::max()), + static_cast(1024 * 16))); + + const D extent_bound = + (bound.has_value() ? std::min(bound.value(), extent_limit) : + extent_limit); + + // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is + // inclusive. So we have to be careful to avoid overflow. + + D extent_lower_bound = 1; + D extent_upper_bound; + + const auto bound_distance = + checked_sub(domain.upper_bound, domain.lower_bound); + if (bound_distance.has_value()) { + extent_upper_bound = + (bound_distance.value() < extent_bound ? bound_distance.value() + 1 : + extent_bound); + } else { + extent_upper_bound = extent_bound; + } + + return gen::inRange(extent_lower_bound, extent_upper_bound + 1); +} + +template +Gen> make_dimension( + std::optional::value_type> extent_bound = + std::nullopt, + std::optional::value_type> domain_bound = + std::nullopt) { + using CoordType = templates::Dimension::value_type; + auto tup = gen::mapcat( + make_domain(domain_bound), + [extent_bound](Domain domain) { + return gen::pair(gen::just(domain), make_extent(domain, extent_bound)); + }); + + return gen::map(tup, [](std::pair, CoordType> tup) { + return templates::Dimension(tup.first, tup.second); + }); +} + +template +struct Arbitrary> { + static Gen> arbitrary() { + return make_dimension(); + } +}; + +template +Gen make_coordinate(const templates::Domain& domain) { + // `gen::inRange` does an exclusive upper bound, + // whereas the domain upper bound is inclusive. + // As a result some contortion is required to deal + // with numeric_limits. + if constexpr (std::is_same_v) { + // NB: poor performance with small domains for sure + return gen::suchThat( + gen::map( + gen::string(), + [](std::string s) { + StringDimensionCoordType v(s.begin(), s.end()); + return v; + }), + [domain](const StringDimensionCoordType& s) { + return domain.lower_bound <= s && s <= domain.upper_bound; + }); + } else if constexpr (std::is_signed::value) { + if (int64_t(domain.upper_bound) < std::numeric_limits::max()) { + return gen::cast(gen::inRange( + int64_t(domain.lower_bound), int64_t(domain.upper_bound + 1))); + } else { + return gen::inRange(domain.lower_bound, domain.upper_bound); + } + } else { + if (uint64_t(domain.upper_bound) < std::numeric_limits::max()) { + return gen::cast(gen::inRange( + uint64_t(domain.lower_bound), uint64_t(domain.upper_bound + 1))); + } else { + return gen::inRange(domain.lower_bound, domain.upper_bound); + } + } +} + +template +Gen> make_range(const templates::Domain& domain) { + return gen::apply( + [](D p1, D p2) { return templates::Domain(p1, p2); }, + make_coordinate(domain), + make_coordinate(domain)); +} + +template <> +void show>(const templates::Domain& domain, std::ostream& os); + +template <> +void show>( + const templates::Domain& domain, std::ostream& os); + +template <> +void show>( + const templates::Dimension& dimension, + std::ostream& os); + +} // namespace rc + +#endif diff --git a/test/support/rapidcheck/array_templates.h b/test/support/rapidcheck/array_templates.h index 2235e0b1112..37762a9ba6b 100644 --- a/test/support/rapidcheck/array_templates.h +++ b/test/support/rapidcheck/array_templates.h @@ -34,6 +34,7 @@ #ifndef TILEDB_RAPIDCHECK_ARRAY_H #define TILEDB_RAPIDCHECK_ARRAY_H +#include #include #include #include @@ -43,152 +44,6 @@ namespace rc { using namespace tiledb::test; using namespace tiledb::test::templates; -template -struct Arbitrary> { - static Gen> arbitrary() { - // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is - // inclusive. So we have to use `int64_t` to avoid overflow. - auto bounds = gen::mapcat(gen::arbitrary(), [](D lb) { - if constexpr (std::is_same::value) { - return gen::pair( - gen::just(lb), gen::inRange(lb, std::numeric_limits::max())); - } else if constexpr (std::is_same::value) { - return gen::pair( - gen::just(lb), gen::inRange(lb, std::numeric_limits::max())); - } else { - auto ub_limit = int64_t(std::numeric_limits::max()) + 1; - return gen::pair( - gen::just(lb), gen::cast(gen::inRange(int64_t(lb), ub_limit))); - } - }); - - return gen::map(bounds, [](std::pair bounds) { - return templates::Domain(bounds.first, bounds.second); - }); - } -}; - -/** - * @return `a - b` if it does not overflow, `std::nullopt` if it does - */ -template -std::optional checked_sub(T a, T b) { - if (!std::is_signed::value) { - return (b > a ? std::nullopt : std::optional(a - b)); - } else if (b < 0) { - return ( - std::numeric_limits::max() + b < a ? std::nullopt : - std::optional(a - b)); - } else { - return ( - std::numeric_limits::min() - b > a ? std::nullopt : - std::optional(a - b)); - } -} - -template -Gen make_extent( - const templates::Domain& domain, std::optional bound = std::nullopt) { - // upper bound on all possible extents to avoid unreasonably - // huge tile sizes - static constexpr D extent_limit = static_cast( - std::is_signed::value ? - std::min( - static_cast(std::numeric_limits::max()), - static_cast(1024 * 16)) : - std::min( - static_cast(std::numeric_limits::max()), - static_cast(1024 * 16))); - - const D extent_bound = - (bound.has_value() ? std::min(bound.value(), extent_limit) : - extent_limit); - - // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is - // inclusive. So we have to be careful to avoid overflow. - - D extent_lower_bound = 1; - D extent_upper_bound; - - const auto bound_distance = - checked_sub(domain.upper_bound, domain.lower_bound); - if (bound_distance.has_value()) { - extent_upper_bound = - (bound_distance.value() < extent_bound ? bound_distance.value() + 1 : - extent_bound); - } else { - extent_upper_bound = extent_bound; - } - - return gen::inRange(extent_lower_bound, extent_upper_bound + 1); -} - -template -Gen> make_dimension( - std::optional::value_type> extent_bound = - std::nullopt) { - using CoordType = templates::Dimension::value_type; - auto tup = gen::mapcat( - gen::arbitrary>(), - [extent_bound](Domain domain) { - return gen::pair(gen::just(domain), make_extent(domain, extent_bound)); - }); - - return gen::map(tup, [](std::pair, CoordType> tup) { - return templates::Dimension(tup.first, tup.second); - }); -} - -template -struct Arbitrary> { - static Gen> arbitrary() { - return make_dimension(); - } -}; - -template -Gen make_coordinate(const templates::Domain& domain) { - // `gen::inRange` does an exclusive upper bound, - // whereas the domain upper bound is inclusive. - // As a result some contortion is required to deal - // with numeric_limits. - if constexpr (std::is_same_v) { - // NB: poor performance with small domains for sure - return gen::suchThat( - gen::map( - gen::string(), - [](std::string s) { - StringDimensionCoordType v(s.begin(), s.end()); - return v; - }), - [domain](const StringDimensionCoordType& s) { - return domain.lower_bound <= s && s <= domain.upper_bound; - }); - } else if constexpr (std::is_signed::value) { - if (int64_t(domain.upper_bound) < std::numeric_limits::max()) { - return gen::cast(gen::inRange( - int64_t(domain.lower_bound), int64_t(domain.upper_bound + 1))); - } else { - return gen::inRange(domain.lower_bound, domain.upper_bound); - } - } else { - if (uint64_t(domain.upper_bound) < std::numeric_limits::max()) { - return gen::cast(gen::inRange( - uint64_t(domain.lower_bound), uint64_t(domain.upper_bound + 1))); - } else { - return gen::inRange(domain.lower_bound, domain.upper_bound); - } - } -} - -template -Gen> make_range(const templates::Domain& domain) { - return gen::apply( - [](D p1, D p2) { return templates::Domain(p1, p2); }, - make_coordinate(domain), - make_coordinate(domain)); -} - template Gen> make_fragment_1d( bool allow_duplicates, const Domain& d) { @@ -320,18 +175,6 @@ Gen> make_fragment_3d( }); } -template <> -void show>(const templates::Domain& domain, std::ostream& os); - -template <> -void show>( - const templates::Domain& domain, std::ostream& os); - -template <> -void show>( - const templates::Dimension& dimension, - std::ostream& os); - namespace detail { /** diff --git a/test/support/src/array_schema_templates.h b/test/support/src/array_schema_templates.h new file mode 100644 index 00000000000..5a89d9fdea1 --- /dev/null +++ b/test/support/src/array_schema_templates.h @@ -0,0 +1,217 @@ +/** + * @file test/support/src/array_schema_templates.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides templates for generic programming with respect + * to array schema, data types, etc. + */ + +#ifndef TILEDB_ARRAY_SCHEMA_TEMPLATES_H +#define TILEDB_ARRAY_SCHEMA_TEMPLATES_H + +#include "tiledb/type/datatype_traits.h" +#include "tiledb/type/range/range.h" + +#include +#include + +namespace tiledb::test::templates { + +using StringDimensionCoordType = std::vector; +using StringDimensionCoordView = std::span; + +/** + * Constrains types which can be used as the physical type of a dimension. + */ +template +concept DimensionType = + std::is_same_v or requires(const D& coord) { + typename std::is_signed; + { coord < coord } -> std::same_as; + { D(int64_t(coord)) } -> std::same_as; + }; + +/** + * Constrains types which can be used as the physical type of an attribute. + * + * Right now this doesn't constrain anything, it is just a marker for + * readability, and someday we might want it do require something. + * + * This used to have + * ``` + * typename query_buffers::cell_type; + * ``` + * but that was removed to simplify include whatnot and forward declaration etc + */ +template +concept AttributeType = true; + +/** + * A generic, statically-typed range which is inclusive on both ends. + */ +template +struct Domain { + D lower_bound; + D upper_bound; + + Domain() { + } + + Domain(D d1, D d2) + : lower_bound(std::min(d1, d2)) + , upper_bound(std::max(d1, d2)) { + } + + bool operator==(const Domain&) const = default; + + uint64_t num_cells() const { + // FIXME: this is incorrect for 64-bit domains which need to check overflow + if (std::is_signed::value) { + return static_cast(upper_bound) - + static_cast(lower_bound) + 1; + } else { + return static_cast(upper_bound) - + static_cast(lower_bound) + 1; + } + } + + bool contains(D point) const { + return lower_bound <= point && point <= upper_bound; + } + + bool intersects(const Domain& other) const { + return (other.lower_bound <= lower_bound && + lower_bound <= other.upper_bound) || + (other.lower_bound <= upper_bound && + upper_bound <= other.upper_bound) || + (lower_bound <= other.lower_bound && + other.lower_bound <= upper_bound) || + (lower_bound <= other.upper_bound && + other.upper_bound <= upper_bound); + } + + tiledb::type::Range range() const { + return tiledb::type::Range(lower_bound, upper_bound); + } +}; + +/** + * A description of a dimension as it pertains to its datatype. + */ +template +struct Dimension { + using value_type = tiledb::type::datatype_traits::value_type; + using domain_type = Domain; + + Dimension() = default; + Dimension(Domain domain, value_type extent) + : domain(domain) + , extent(extent) { + } + + Dimension(value_type lower_bound, value_type upper_bound, value_type extent) + : Dimension(Domain(lower_bound, upper_bound), extent) { + } + + Domain domain; + value_type extent; + + /** + * @return the number of tiles spanned by the whole domain of this dimension + */ + uint64_t num_tiles() const { + return num_tiles(domain); + } + + /** + * @return the number of tiles spanned by a range in this dimension + */ + uint64_t num_tiles(const domain_type& range) const { + return (range.num_cells() + extent - 1) / extent; + } +}; + +template +struct static_attribute {}; + +template +struct static_attribute { + static constexpr tiledb::sm::Datatype datatype = DATATYPE; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool nullable = false; + + using value_type = + typename tiledb::type::datatype_traits::value_type; + using cell_type = value_type; +}; + +template +struct static_attribute { + static constexpr tiledb::sm::Datatype datatype = DATATYPE; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool nullable = true; + + using value_type = std::optional< + typename tiledb::type::datatype_traits::value_type>; + using cell_type = value_type; +}; + +template +struct static_attribute { + static constexpr tiledb::sm::Datatype datatype = DATATYPE; + static constexpr uint32_t cell_val_num = tiledb::sm::cell_val_num_var; + static constexpr bool nullable = false; + + using value_type = + typename tiledb::type::datatype_traits::value_type; + using cell_type = std::vector; +}; + +template +struct static_attribute { + static constexpr tiledb::sm::Datatype datatype = DATATYPE; + static constexpr uint32_t cell_val_num = tiledb::sm::cell_val_num_var; + static constexpr bool nullable = true; + + using value_type = + typename tiledb::type::datatype_traits::value_type; + using cell_type = std::optional>; +}; + +template +constexpr std::tuple +attribute_properties() { + return { + static_attribute::datatype, + static_attribute::cell_val_num, + static_attribute::nullable}; +} + +} // namespace tiledb::test::templates + +#endif diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 9466b7b5186..de7db91c62a 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -42,6 +42,7 @@ #include "tiledb/type/range/range.h" #include +#include #include #include #include @@ -59,9 +60,6 @@ class Dimension; namespace tiledb::test::templates { -using StringDimensionCoordType = std::vector; -using StringDimensionCoordView = std::span; - /** * Adapts a `std::tuple` whose fields are all `GlobalCellCmp` * to itself be `GlobalCellCmp`. @@ -123,26 +121,6 @@ struct global_cell_cmp_std_tuple { template struct query_buffers {}; -/** - * Constrains types which can be used as the physical type of a dimension. - */ -template -concept DimensionType = - std::is_same_v or requires(const D& coord) { - typename std::is_signed; - { coord < coord } -> std::same_as; - { D(int64_t(coord)) } -> std::same_as; - }; - -/** - * Constrains types which can be used as the physical type of an attribute. - * - * Right now this doesn't constrain anything, it is just a marker for - * readability, and someday we might want it do require something. - */ -template -concept AttributeType = requires(T) { typename query_buffers::cell_type; }; - /** * Constrains types which can be used as columnar data fragment input. * @@ -165,160 +143,7 @@ concept FragmentType = requires(const T& fragment) { }; /** - * A generic, statically-typed range which is inclusive on both ends. - */ -template -struct Domain { - D lower_bound; - D upper_bound; - - Domain() { - } - - Domain(D d1, D d2) - : lower_bound(std::min(d1, d2)) - , upper_bound(std::max(d1, d2)) { - } - - bool operator==(const Domain&) const = default; - - uint64_t num_cells() const { - // FIXME: this is incorrect for 64-bit domains which need to check overflow - if (std::is_signed::value) { - return static_cast(upper_bound) - - static_cast(lower_bound) + 1; - } else { - return static_cast(upper_bound) - - static_cast(lower_bound) + 1; - } - } - - bool contains(D point) const { - return lower_bound <= point && point <= upper_bound; - } - - bool intersects(const Domain& other) const { - return (other.lower_bound <= lower_bound && - lower_bound <= other.upper_bound) || - (other.lower_bound <= upper_bound && - upper_bound <= other.upper_bound) || - (lower_bound <= other.lower_bound && - other.lower_bound <= upper_bound) || - (lower_bound <= other.upper_bound && - other.upper_bound <= upper_bound); - } - - tiledb::type::Range range() const { - return tiledb::type::Range(lower_bound, upper_bound); - } -}; - -/** - * A description of a dimension as it pertains to its datatype. - */ -template -struct Dimension { - using value_type = tiledb::type::datatype_traits::value_type; - using domain_type = Domain; - - Dimension() = default; - Dimension(Domain domain, value_type extent) - : domain(domain) - , extent(extent) { - } - - Dimension(value_type lower_bound, value_type upper_bound, value_type extent) - : Dimension(Domain(lower_bound, upper_bound), extent) { - } - - Domain domain; - value_type extent; - - /** - * @return the number of tiles spanned by the whole domain of this dimension - */ - uint64_t num_tiles() const { - return num_tiles(domain); - } - - /** - * @return the number of tiles spanned by a range in this dimension - */ - uint64_t num_tiles(const domain_type& range) const { - return (range.num_cells() + extent - 1) / extent; - } -}; - -template <> -struct Dimension { - using value_type = StringDimensionCoordType; - - Dimension() { - } - - Dimension(const Domain& domain) - : domain(domain) { - } - - std::optional> domain; -}; - -template -struct static_attribute {}; - -template -struct static_attribute { - static constexpr Datatype datatype = DATATYPE; - static constexpr uint32_t cell_val_num = 1; - static constexpr bool nullable = false; - - using value_type = - typename tiledb::type::datatype_traits::value_type; - using cell_type = value_type; -}; - -template -struct static_attribute { - static constexpr Datatype datatype = DATATYPE; - static constexpr uint32_t cell_val_num = 1; - static constexpr bool nullable = true; - - using value_type = std::optional< - typename tiledb::type::datatype_traits::value_type>; - using cell_type = value_type; -}; - -template -struct static_attribute { - static constexpr Datatype datatype = DATATYPE; - static constexpr uint32_t cell_val_num = tiledb::sm::cell_val_num_var; - static constexpr bool nullable = false; - - using value_type = - typename tiledb::type::datatype_traits::value_type; - using cell_type = std::vector; -}; - -template -struct static_attribute { - static constexpr Datatype datatype = DATATYPE; - static constexpr uint32_t cell_val_num = tiledb::sm::cell_val_num_var; - static constexpr bool nullable = true; - - using value_type = - typename tiledb::type::datatype_traits::value_type; - using cell_type = std::optional>; -}; - -template -constexpr std::tuple attribute_properties() { - return { - static_attribute::datatype, - static_attribute::cell_val_num, - static_attribute::nullable}; -} - -/** +2D) * Schema of named fields for simple evaluation of a query condition */ template diff --git a/tiledb/sm/query/test/CMakeLists.txt b/tiledb/sm/query/test/CMakeLists.txt index 85f64c86be7..13742dc2c81 100644 --- a/tiledb/sm/query/test/CMakeLists.txt +++ b/tiledb/sm/query/test/CMakeLists.txt @@ -27,7 +27,7 @@ include(unit_test) commence(unit_test query) - this_target_sources(main.cc unit_validity_vector.cc unit_query_condition.cc) + this_target_sources(main.cc unit_validity_vector.cc unit_query_condition.cc unit_domain_tile_offset.cc) # Not actually testing a unit yet, but some things that ought to be units this_target_link_libraries(tiledb_test_support_lib) this_target_link_libraries(ast_test_support_lib) diff --git a/tiledb/sm/query/test/unit_domain_tile_offset.cc b/tiledb/sm/query/test/unit_domain_tile_offset.cc new file mode 100644 index 00000000000..26bcf119fed --- /dev/null +++ b/tiledb/sm/query/test/unit_domain_tile_offset.cc @@ -0,0 +1,214 @@ +#include +#include +#include "test/support/rapidcheck/array_schema_templates.h" +#include "test/support/src/array_schema_templates.h" +#include "tiledb/sm/array_schema/dimension.h" +#include "tiledb/sm/misc/types.h" +#include "tiledb/type/range/range.h" + +#include + +using namespace tiledb; +using namespace tiledb::test; + +template +static bool is_rectangular_domain( + std::span tile_extents, + const sm::NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + for (uint64_t d_outer = 0; d_outer < tile_extents.size(); d_outer++) { + uint64_t hyperrow_num_tiles = 1; + for (uint64_t d_inner = d_outer + 1; d_inner < tile_extents.size(); + d_inner++) { + const uint64_t d_inner_num_tiles = sm::Dimension::tile_idx( + domain[d_inner].end_as(), + domain[d_inner].start_as(), + tile_extents[d_inner]) + + 1; + hyperrow_num_tiles *= d_inner_num_tiles; + } + + const uint64_t hyperrow_offset = start_tile % hyperrow_num_tiles; + if (hyperrow_offset + num_tiles > hyperrow_num_tiles) { + if (hyperrow_offset != 0) { + return false; + } else if (num_tiles % hyperrow_num_tiles != 0) { + return false; + } + } + } + return true; +} + +template +static bool is_rectangular_domain( + std::span tile_extents, + T lower_bound, + T upper_bound, + uint64_t start_tile, + uint64_t num_tiles) { + sm::NDRange r; + r.push_back(Range(lower_bound, upper_bound)); + return is_rectangular_domain(tile_extents, r, start_tile, num_tiles); +} + +template +static bool is_rectangular_domain( + std::span tile_extents, + T d1_lower_bound, + T d1_upper_bound, + T d2_lower_bound, + T d2_upper_bound, + uint64_t start_tile, + uint64_t num_tiles) { + sm::NDRange r; + r.push_back(Range(d1_lower_bound, d1_upper_bound)); + r.push_back(Range(d2_lower_bound, d2_upper_bound)); + return is_rectangular_domain(tile_extents, r, start_tile, num_tiles); +} + +// in one dimension all domains are rectangles +TEST_CASE("is_rectangular_domain 1d", "[arithmetic]") { + rc::prop( + "is_rectangular_domain 1d", + [](templates::Dimension dimension) { + const uint64_t start_tile = + *rc::gen::inRange(0, dimension.num_tiles()); + const uint64_t num_tiles = + *rc::gen::inRange(1, dimension.num_tiles() - start_tile); + + const std::vector extents = {dimension.extent}; + RC_ASSERT(is_rectangular_domain( + extents, + dimension.domain.lower_bound, + dimension.domain.upper_bound, + start_tile, + num_tiles)); + }); +} + +TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { + /* + * Domain is a 16x16 square + */ + SECTION("Square") { + const uint64_t d1_lower = GENERATE(0, 3); + const uint64_t d1_upper = d1_lower + 16 - 1; + const uint64_t d2_lower = GENERATE(0, 3); + const uint64_t d2_upper = d2_lower + 16 - 1; + + SECTION("Row tiles") { + const std::vector extents = {1, 16}; + for (uint64_t start_tile = 0; start_tile < 15; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= 16; + num_tiles++) { + CAPTURE(start_tile, num_tiles); + CHECK(is_rectangular_domain( + extents, + d1_lower, + d1_upper, + d2_lower, + d2_upper, + start_tile, + num_tiles)); + } + } + } + + SECTION("Square tiles") { + // 7x7 tiles will subdivide the 16x16 square into 3x3 tiles + const std::vector extents = {7, 7}; + + auto tt = [&](uint64_t start_tile, uint64_t num_tiles) -> bool { + return is_rectangular_domain( + extents, + d1_lower, + d1_upper, + d2_lower, + d2_upper, + start_tile, + num_tiles); + }; + + // tiles aligned with the start: rectangle formed if less than one row, or + // integral number of rows + for (uint64_t start_tile : {0, 3, 6}) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= 9; num_tiles++) { + CAPTURE(start_tile, num_tiles); + if (num_tiles < 3 || num_tiles % 3 == 0) { + CHECK(tt(start_tile, num_tiles)); + } else { + CHECK(!tt(start_tile, num_tiles)); + } + } + } + + // otherwise a rectangle is only formed within the same row + for (uint64_t start_tile : {1, 2, 4, 5, 7, 8}) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= 9; num_tiles++) { + CAPTURE(start_tile, num_tiles); + if ((start_tile % 3) + num_tiles <= 3) { + CHECK(tt(start_tile, num_tiles)); + } else { + CHECK(!tt(start_tile, num_tiles)); + } + } + } + } + } + + using Dim64 = templates::Dimension; + + auto instance_is_rectangular_domain_2d = + [](Dim64 d1, Dim64 d2) { + const std::vector extents = {d1.extent, d2.extent}; + auto tt = [&](uint64_t start_tile, uint64_t num_tiles) -> bool { + return is_rectangular_domain( + extents, + d1.domain.lower_bound, + d1.domain.upper_bound, + d2.domain.lower_bound, + d2.domain.upper_bound, + start_tile, + num_tiles); + }; + + const uint64_t total_tiles = d1.num_tiles() * d2.num_tiles(); + + for (uint64_t t = 0; t < d1.num_tiles(); t += d2.num_tiles()) { + // row-aligned tiles + for (uint64_t num_tiles = 1; t + num_tiles <= total_tiles; + num_tiles++) { + if (num_tiles <= d2.num_tiles() || + num_tiles % d2.num_tiles() == 0) { + ASSERTER(tt(t, num_tiles)); + } else { + ASSERTER(!tt(t, num_tiles)); + } + } + // other tiles + for (uint64_t o = 1; t + o < d2.num_tiles(); o++) { + for (uint64_t num_tiles = 1; t + o + num_tiles <= total_tiles; + num_tiles++) { + if (((t + o) % d2.num_tiles()) + num_tiles <= d2.num_tiles()) { + ASSERTER(tt(t + o, num_tiles)); + } else { + ASSERTER(!tt(t + o, num_tiles)); + } + } + } + } + }; + + SECTION("Shrinking") { + instance_is_rectangular_domain_2d(Dim64(0, 2, 1), Dim64(0, 0, 1)); + instance_is_rectangular_domain_2d(Dim64(0, 2, 1), Dim64(0, 1, 1)); + } + + rc::prop("is_rectangular_domain 2d", [&]() { + Dim64 d1 = *rc::make_dimension(std::nullopt, {64}); + Dim64 d2 = *rc::make_dimension(std::nullopt, {64}); + instance_is_rectangular_domain_2d.operator()(d1, d2); + }); +} From 59b450f4352554a865b4c414df0177110740b8aa Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 22 Oct 2025 21:54:58 -0400 Subject: [PATCH 049/109] is_rectangular_domain 3d --- .../sm/query/test/unit_domain_tile_offset.cc | 145 +++++++++++++++++- 1 file changed, 137 insertions(+), 8 deletions(-) diff --git a/tiledb/sm/query/test/unit_domain_tile_offset.cc b/tiledb/sm/query/test/unit_domain_tile_offset.cc index 26bcf119fed..36cb4a4fa4f 100644 --- a/tiledb/sm/query/test/unit_domain_tile_offset.cc +++ b/tiledb/sm/query/test/unit_domain_tile_offset.cc @@ -68,6 +68,40 @@ static bool is_rectangular_domain( return is_rectangular_domain(tile_extents, r, start_tile, num_tiles); } +template +static bool is_rectangular_domain( + const templates::Dimension
& d1, + const templates::Dimension
& d2, + uint64_t start_tile, + uint64_t num_tiles) { + using Coord = templates::Dimension
::value_type; + const std::vector extents = {d1.extent, d2.extent}; + return is_rectangular_domain( + extents, + d1.domain.lower_bound, + d1.domain.upper_bound, + d2.domain.lower_bound, + d2.domain.upper_bound, + start_tile, + num_tiles); +} + +template +static bool is_rectangular_domain( + const templates::Dimension
& d1, + const templates::Dimension
& d2, + const templates::Dimension
& d3, + uint64_t start_tile, + uint64_t num_tiles) { + using Coord = templates::Dimension
::value_type; + const std::vector extents = {d1.extent, d2.extent, d3.extent}; + sm::NDRange r; + r.push_back(Range(d1.domain.lower_bound, d1.domain.upper_bound)); + r.push_back(Range(d2.domain.lower_bound, d2.domain.upper_bound)); + r.push_back(Range(d3.domain.lower_bound, d3.domain.upper_bound)); + return is_rectangular_domain(extents, r, start_tile, num_tiles); +} + // in one dimension all domains are rectangles TEST_CASE("is_rectangular_domain 1d", "[arithmetic]") { rc::prop( @@ -164,14 +198,7 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { [](Dim64 d1, Dim64 d2) { const std::vector extents = {d1.extent, d2.extent}; auto tt = [&](uint64_t start_tile, uint64_t num_tiles) -> bool { - return is_rectangular_domain( - extents, - d1.domain.lower_bound, - d1.domain.upper_bound, - d2.domain.lower_bound, - d2.domain.upper_bound, - start_tile, - num_tiles); + return is_rectangular_domain(d1, d2, start_tile, num_tiles); }; const uint64_t total_tiles = d1.num_tiles() * d2.num_tiles(); @@ -212,3 +239,105 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { instance_is_rectangular_domain_2d.operator()(d1, d2); }); } + +TEST_CASE("is_rectangular_domain 3d", "[arithmetic]") { + using Dim64 = templates::Dimension; + + /** + * 3D plane tiles (where the outermost dimension has extent 1) + * should produce the same results as rectangular tiles in the plane + */ + rc::prop("plane tiles", [&]() { + Dim64 d1 = *rc::make_dimension(std::nullopt, {1}); + Dim64 d2 = *rc::make_dimension(std::nullopt, {32}); + Dim64 d3 = *rc::make_dimension(std::nullopt, {32}); + + const uint64_t total_tiles = + d1.num_tiles() * d2.num_tiles() * d3.num_tiles(); + for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; + num_tiles++) { + const bool rectangle = + is_rectangular_domain(d2, d3, start_tile, num_tiles); + const bool plane = + is_rectangular_domain(d1, d2, d3, start_tile, num_tiles); + + RC_ASSERT(rectangle == plane); + } + } + }); + + /** + * Runs over the possible `(start_tiles, num_tiles)` pairs for dimensions + * `{d1, d2, d3}` and asserts that `is_rectangular_domain` returns true if and + * only if the pair represents an expected rectangle. + */ + auto instance_is_rectangular_domain_3d = + [](Dim64 d1, Dim64 d2, Dim64 d3) { + auto tt = [&](uint64_t start_tile, uint64_t num_tiles) -> bool { + return is_rectangular_domain(d1, d2, d3, start_tile, num_tiles); + }; + + const uint64_t total_tiles = + d1.num_tiles() * d2.num_tiles() * d3.num_tiles(); + const uint64_t plane_tiles = d2.num_tiles() * d3.num_tiles(); + + for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; + num_tiles++) { + if (start_tile % plane_tiles == 0) { + // aligned to a plane, several options to be a rectangle + if (num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles)); + } else if ( + num_tiles <= plane_tiles && num_tiles % d3.num_tiles() == 0) { + ASSERTER(tt(start_tile, num_tiles)); + } else if (num_tiles % (plane_tiles) == 0) { + ASSERTER(tt(start_tile, num_tiles)); + } else { + ASSERTER(!tt(start_tile, num_tiles)); + } + } else if (start_tile % d3.num_tiles() == 0) { + // aligned to a row within a plane, but not aligned to the plane + // this is a rectangle if it is an integral number of rows, or + // fits within a row + if (num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles)); + } else if ( + num_tiles % d3.num_tiles() == 0 && + (start_tile % plane_tiles) + num_tiles <= plane_tiles) { + ASSERTER(tt(start_tile, num_tiles)); + } else { + ASSERTER(!tt(start_tile, num_tiles)); + } + } else { + // unaligned, only a rectangle if it doesn't advance rows + if (start_tile % d3.num_tiles() + num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles)); + } else { + ASSERTER(!tt(start_tile, num_tiles)); + } + } + } + } + }; + + SECTION("Shrinking") { + instance_is_rectangular_domain_3d( + Dim64(0, 1, 1), Dim64(0, 0, 1), Dim64(0, 1, 1)); + instance_is_rectangular_domain_3d( + Dim64(0, 1, 1), Dim64(0, 2, 1), Dim64(0, 0, 1)); + } + + rc::prop("any tiles", [&]() { + const Dim64 d1 = + *rc::make_dimension(std::nullopt, {16}); + const Dim64 d2 = + *rc::make_dimension(std::nullopt, {16}); + const Dim64 d3 = + *rc::make_dimension(std::nullopt, {16}); + + instance_is_rectangular_domain_3d.operator()( + d1, d2, d3); + }); +} From ce75d5bafd2b23ca3e62089b8c642a76aff7cd97 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 27 Oct 2025 11:07:11 -0400 Subject: [PATCH 050/109] domain_tile_offset with passing example tests for 2D --- .../sm/query/test/unit_domain_tile_offset.cc | 311 ++++++++++++++++++ 1 file changed, 311 insertions(+) diff --git a/tiledb/sm/query/test/unit_domain_tile_offset.cc b/tiledb/sm/query/test/unit_domain_tile_offset.cc index 36cb4a4fa4f..6f05b07628b 100644 --- a/tiledb/sm/query/test/unit_domain_tile_offset.cc +++ b/tiledb/sm/query/test/unit_domain_tile_offset.cc @@ -41,6 +41,84 @@ static bool is_rectangular_domain( return true; } +/** + * Compute the number of tiles per hyper-row for the given `domain` with tiles + * given by `tile_extents`. + * + * For D dimensions, the returned vector contains `D+1` elements. + * Position 0 is the number of tiles in `domain`. + * For dimension `d`, position `d + 1` is the number of tiles in a hyper-row of + * dimension `d` (and is thus always 1 for the final dimension). + */ +template +std::vector compute_hyperrow_sizes( + std::span tile_extents, const sm::NDRange& domain) { + std::vector hyperrow_sizes(tile_extents.size() + 1, 1); + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t d_num_tiles = + sm::Dimension::tile_idx( + domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + + 1; + hyperrow_sizes[d] = d_num_tiles; + } + for (uint64_t d = tile_extents.size(); d > 0; d--) { + hyperrow_sizes[d - 1] = hyperrow_sizes[d - 1] * hyperrow_sizes[d]; + } + + return hyperrow_sizes; +} + +/** + * @return a new range which is contained the rectangle within `domain` defined + * by `[start_tile, start_tile + num_tiles)` for the tile sizes given by + * `tile_extents`. If this does not represent a valid rectangle then + * `std::nullopt` is returned instead. + */ +template +static std::optional domain_tile_offset( + std::span tile_extents, + const sm::NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + sm::NDRange r; + + const std::vector dimension_sizes = + compute_hyperrow_sizes(tile_extents, domain); + + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t outer_num_tiles = dimension_sizes[d]; + const uint64_t hyperrow_num_tiles = dimension_sizes[d + 1]; + + const T this_dimension_start_tile = + (start_tile / hyperrow_num_tiles) % outer_num_tiles; + const T this_dimension_end_tile = + ((start_tile + num_tiles - 1) / hyperrow_num_tiles) % outer_num_tiles; + + if (start_tile % hyperrow_num_tiles == 0) { + // aligned to the start of the hyperrow + if (num_tiles > hyperrow_num_tiles && + num_tiles % hyperrow_num_tiles != 0) { + return std::nullopt; + } + } else { + // begins in the middle of the hyperrow + const uint64_t offset = start_tile % hyperrow_num_tiles; + if (offset + num_tiles > hyperrow_num_tiles) { + return std::nullopt; + } + } + + const T start = + domain[d].start_as() + (this_dimension_start_tile * tile_extents[d]); + const T end = domain[d].start_as() + + (this_dimension_end_tile * tile_extents[d]) + + tile_extents[d] - 1; + r.push_back(Range(start, end)); + } + + return r; +} + template static bool is_rectangular_domain( std::span tile_extents, @@ -341,3 +419,236 @@ TEST_CASE("is_rectangular_domain 3d", "[arithmetic]") { d1, d2, d3); }); } + +template +std::optional instance_domain_tile_offset( + std::span tile_extents, + const sm::NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + const bool expect_rectangle = + is_rectangular_domain(tile_extents, domain, start_tile, num_tiles); + const std::optional adjusted_domain = + domain_tile_offset(tile_extents, domain, start_tile, num_tiles); + if (!expect_rectangle) { + ASSERTER(!adjusted_domain.has_value()); + return std::nullopt; + } + + ASSERTER(adjusted_domain.has_value()); + + uint64_t num_tiles_result = 1; + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t num_tiles_this_dimension = + sm::Dimension::tile_idx( + adjusted_domain.value()[d].end_as(), + adjusted_domain.value()[d].start_as(), + tile_extents[d]) + + 1; + num_tiles_result *= num_tiles_this_dimension; + } + ASSERTER(num_tiles_result == num_tiles); + + const std::vector hyperrow_sizes = + compute_hyperrow_sizes(tile_extents, domain); + + uint64_t start_tile_result = 0; + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t start_tile_this_dimension = sm::Dimension::tile_idx( + adjusted_domain.value()[d].start_as(), + domain[d].start_as(), + tile_extents[d]); + start_tile_result += start_tile_this_dimension * hyperrow_sizes[d + 1]; + } + ASSERTER(start_tile_result == start_tile); + + return adjusted_domain; +} + +template +void instance_domain_tile_offset( + std::span tile_extents, const sm::NDRange& domain) { + uint64_t total_tiles = 1; + for (const auto& d : tile_extents) { + total_tiles *= d; + } + for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; + num_tiles++) { + instance_domain_tile_offset( + tile_extents, domain, start_tile, num_tiles); + } + } +} + +template +std::optional::value_type>>> +instance_domain_tile_offset( + const std::vector>& dims, + uint64_t start_tile, + uint64_t num_tiles) { + using Coord = typename templates::Dimension
::value_type; + + std::vector tile_extents; + for (const auto& dim : dims) { + tile_extents.push_back(dim.extent); + } + + sm::NDRange domain; + for (const auto& dim : dims) { + domain.push_back(Range(dim.domain.lower_bound, dim.domain.upper_bound)); + } + + const auto range = instance_domain_tile_offset( + tile_extents, domain, start_tile, num_tiles); + if (!range.has_value()) { + return std::nullopt; + } + + std::vector> typed_range; + for (const auto& r : range.value()) { + typed_range.emplace_back( + r.template start_as(), r.template end_as()); + } + return typed_range; +} + +template +void instance_domain_tile_offset( + const std::vector>& dims) { + using Coord = templates::Dimension
::value_type; + + std::vector tile_extents; + for (const auto& dim : dims) { + tile_extents.push_back(dim.extent); + } + + sm::NDRange domain; + for (const auto& dim : dims) { + domain.push_back(Range(dim.domain.lower_bound, dim.domain.upper_bound)); + } + + instance_domain_tile_offset(tile_extents, domain); +} + +TEST_CASE("domain_tile_offset 1d", "[arithmetic]") { + using Dim64 = templates::Dimension; + + SECTION("Shrinking") { + instance_domain_tile_offset({Dim64(0, 5, 2)}); + } + + rc::prop("any tiles", []() { + const Dim64 d1 = *rc::make_dimension(std::nullopt, {128}); + + instance_domain_tile_offset({d1}); + }); +} + +TEST_CASE("domain_tile_offset 2d", "[arithmetic]") { + using Dim64 = templates::Dimension; + using Dom64 = Dim64::domain_type; + + SECTION("Rectangle examples") { + const uint64_t d1_lower_bound = GENERATE(0, 3); + const uint64_t d1_extent = GENERATE(1, 4); + const uint64_t d2_lower_bound = GENERATE(0, 3); + const uint64_t d2_extent = GENERATE(1, 4); + + const Dim64 d1( + d1_lower_bound, d1_lower_bound + (5 * d1_extent) - 1, d1_extent); + const Dim64 d2( + d2_lower_bound, d2_lower_bound + (4 * d2_extent) - 1, d2_extent); + + SECTION("Whole domain") { + const auto r = + instance_domain_tile_offset({d1, d2}, 0, 20); + CHECK(r == std::vector{d1.domain, d2.domain}); + } + + SECTION("Sub-rectangle") { + const auto r1 = + instance_domain_tile_offset({d1, d2}, 4, 8); + CHECK( + r1 == std::vector{ + Dom64( + d1_lower_bound + d1_extent, + d1_lower_bound + 3 * d1_extent - 1), + d2.domain}); + + const auto r2 = + instance_domain_tile_offset({d1, d2}, 8, 4); + CHECK( + r2 == std::vector{ + Dom64( + d1_lower_bound + 2 * d1_extent, + d1_lower_bound + 3 * d1_extent - 1), + d2.domain}); + + const auto r3 = + instance_domain_tile_offset({d1, d2}, 8, 12); + CHECK( + r3 == std::vector{ + Dom64( + d1_lower_bound + 2 * d1_extent, + d1_lower_bound + 5 * d1_extent - 1), + d2.domain}); + } + + SECTION("Line") { + const auto r1 = + instance_domain_tile_offset({d1, d2}, 0, 2); + CHECK( + r1 == std::vector{ + Dom64(d1_lower_bound, d1_lower_bound + d1_extent - 1), + Dom64( + d2_lower_bound + 0 * d2_extent, + d2_lower_bound + 2 * d2_extent - 1)}); + + const auto r2 = + instance_domain_tile_offset({d1, d2}, 1, 2); + CHECK( + r2 == std::vector{ + Dom64(d1_lower_bound, d1_lower_bound + d1_extent - 1), + Dom64( + d2_lower_bound + 1 * d2_extent, + d2_lower_bound + 3 * d2_extent - 1)}); + + const auto r3 = + instance_domain_tile_offset({d1, d2}, 9, 3); + CHECK( + r3 == std::vector{ + Dom64( + d1_lower_bound + 2 * d1_extent, + d1_lower_bound + 3 * d1_extent - 1), + Dom64( + d2_lower_bound + 1 * d2_extent, + d2_lower_bound + 4 * d2_extent - 1)}); + } + + SECTION("Align start but not end") { + const auto r1 = + instance_domain_tile_offset({d1, d2}, 0, 5); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2}, 4, 11); + CHECK(r2 == std::optional>{}); + } + + SECTION("Cross row") { + const auto r1 = + instance_domain_tile_offset({d1, d2}, 7, 2); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2}, 5, 4); + CHECK(r2 == std::optional>{}); + + const auto r3 = + instance_domain_tile_offset({d1, d2}, 5, 8); + CHECK(r3 == std::optional>{}); + } + } +} From 6ffb56944b34da9d14aea72dd798a1ad38d083ce Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 27 Oct 2025 11:16:51 -0400 Subject: [PATCH 051/109] Fix instance_domain_tile_offset total_tiles --- tiledb/sm/query/test/unit_domain_tile_offset.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tiledb/sm/query/test/unit_domain_tile_offset.cc b/tiledb/sm/query/test/unit_domain_tile_offset.cc index 6f05b07628b..6f6342f41b0 100644 --- a/tiledb/sm/query/test/unit_domain_tile_offset.cc +++ b/tiledb/sm/query/test/unit_domain_tile_offset.cc @@ -469,8 +469,12 @@ template void instance_domain_tile_offset( std::span tile_extents, const sm::NDRange& domain) { uint64_t total_tiles = 1; - for (const auto& d : tile_extents) { - total_tiles *= d; + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t num_tiles_this_dimension = + sm::Dimension::tile_idx( + domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + + 1; + total_tiles *= num_tiles_this_dimension; } for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; From 0be85ddde2d85504b42c45a9b69b98f1d34447b1 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 27 Oct 2025 11:18:52 -0400 Subject: [PATCH 052/109] Add 2d rapidcheck test --- tiledb/sm/query/test/unit_domain_tile_offset.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tiledb/sm/query/test/unit_domain_tile_offset.cc b/tiledb/sm/query/test/unit_domain_tile_offset.cc index 6f6342f41b0..43384eb20dc 100644 --- a/tiledb/sm/query/test/unit_domain_tile_offset.cc +++ b/tiledb/sm/query/test/unit_domain_tile_offset.cc @@ -540,7 +540,7 @@ TEST_CASE("domain_tile_offset 1d", "[arithmetic]") { using Dim64 = templates::Dimension; SECTION("Shrinking") { - instance_domain_tile_offset({Dim64(0, 5, 2)}); + instance_domain_tile_offset({Dim64(0, 18, 5)}); } rc::prop("any tiles", []() { @@ -655,4 +655,11 @@ TEST_CASE("domain_tile_offset 2d", "[arithmetic]") { CHECK(r3 == std::optional>{}); } } + + rc::prop("any tiles", []() { + const Dim64 d1 = *rc::make_dimension(std::nullopt, {64}); + const Dim64 d2 = *rc::make_dimension(std::nullopt, {64}); + + instance_domain_tile_offset({d1, d2}); + }); } From eaed3eb9a3e30a26ccf534900ef09f4f3ba3eb9b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 27 Oct 2025 11:49:43 -0400 Subject: [PATCH 053/109] domain_tile_offset 3d test --- .../sm/query/test/unit_domain_tile_offset.cc | 136 +++++++++++++++++- 1 file changed, 133 insertions(+), 3 deletions(-) diff --git a/tiledb/sm/query/test/unit_domain_tile_offset.cc b/tiledb/sm/query/test/unit_domain_tile_offset.cc index 43384eb20dc..4fa448be1c2 100644 --- a/tiledb/sm/query/test/unit_domain_tile_offset.cc +++ b/tiledb/sm/query/test/unit_domain_tile_offset.cc @@ -89,10 +89,11 @@ static std::optional domain_tile_offset( const uint64_t outer_num_tiles = dimension_sizes[d]; const uint64_t hyperrow_num_tiles = dimension_sizes[d + 1]; - const T this_dimension_start_tile = - (start_tile / hyperrow_num_tiles) % outer_num_tiles; + const T this_dimension_start_tile = (start_tile / hyperrow_num_tiles) % + (outer_num_tiles / hyperrow_num_tiles); const T this_dimension_end_tile = - ((start_tile + num_tiles - 1) / hyperrow_num_tiles) % outer_num_tiles; + ((start_tile + num_tiles - 1) / hyperrow_num_tiles) % + (outer_num_tiles / hyperrow_num_tiles); if (start_tile % hyperrow_num_tiles == 0) { // aligned to the start of the hyperrow @@ -663,3 +664,132 @@ TEST_CASE("domain_tile_offset 2d", "[arithmetic]") { instance_domain_tile_offset({d1, d2}); }); } + +TEST_CASE("domain_tile_offset 3d", "[arithmetic]") { + using Dim64 = templates::Dimension; + using Dom64 = Dim64::domain_type; + + SECTION("Rectangular prism examples") { + const uint64_t d1_lower_bound = 0; // GENERATE(0, 3); + const uint64_t d1_extent = 1; // GENERATE(1, 4); + const uint64_t d2_lower_bound = 0; // GENERATE(0, 3); + const uint64_t d2_extent = 1; // GENERATE(1, 4); + const uint64_t d3_lower_bound = 0; // GENERATE(0, 3); + const uint64_t d3_extent = 1; // GENERATE(1, 4); + + const Dim64 d1( + d1_lower_bound, d1_lower_bound + (3 * d1_extent) - 1, d1_extent); + const Dim64 d2( + d2_lower_bound, d2_lower_bound + (6 * d2_extent) - 1, d2_extent); + const Dim64 d3( + d3_lower_bound, d3_lower_bound + (7 * d3_extent) - 1, d3_extent); + + auto make_d1 = [&](uint64_t h_start, uint64_t h_end) { + return Dom64( + d1_lower_bound + h_start * d1_extent, + d1_lower_bound + h_end * d1_extent + d1_extent - 1); + }; + auto make_d2 = [&](uint64_t w_start, uint64_t w_end) { + return Dom64( + d2_lower_bound + w_start * d2_extent, + d2_lower_bound + w_end * d2_extent + d2_extent - 1); + }; + auto make_d3 = [&](uint64_t l_start, uint64_t l_end) { + return Dom64( + d3_lower_bound + l_start * d3_extent, + d3_lower_bound + l_end * d3_extent + d3_extent - 1); + }; + + SECTION("Whole domain") { + const auto r = instance_domain_tile_offset( + {d1, d2, d3}, 0, d1.num_tiles() * d2.num_tiles() * d3.num_tiles()); + CHECK(r == std::vector{d1.domain, d2.domain, d3.domain}); + } + + SECTION("Plane") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 42); + CHECK(r1 == std::vector{make_d1(0, 0), d2.domain, d3.domain}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 42, 42); + CHECK(r2 == std::vector{make_d1(1, 1), d2.domain, d3.domain}); + + const auto r3 = + instance_domain_tile_offset({d1, d2, d3}, 84, 42); + CHECK(r3 == std::vector{make_d1(2, 2), d2.domain, d3.domain}); + } + + SECTION("Rectangle") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 14); + CHECK(r1 == std::vector{make_d1(0, 0), make_d2(0, 1), d3.domain}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 70, 14); + CHECK(r2 == std::vector{make_d1(1, 1), make_d2(4, 5), d3.domain}); + } + + SECTION("Line") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 4); + CHECK( + r1 == + std::vector{make_d1(0, 0), make_d2(0, 0), make_d3(0, 3)}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 8, 2); + CHECK( + r2 == + std::vector{make_d1(0, 0), make_d2(1, 1), make_d3(1, 2)}); + + const auto r3 = + instance_domain_tile_offset({d1, d2, d3}, 109, 3); + CHECK( + r3 == + std::vector{make_d1(2, 2), make_d2(3, 3), make_d3(4, 6)}); + } + + SECTION("Align start but not end") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 43); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 42, 125); + CHECK(r2 == std::optional>{}); + } + + SECTION("Cross row") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 8); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 23, 6); + CHECK(r2 == std::optional>{}); + } + + SECTION("Cross plane") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 40, 3); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 77, 8); + CHECK(r2 == std::optional>{}); + } + } + + rc::prop("any tiles", []() { + const Dim64 d1 = + *rc::make_dimension(std::nullopt, {16}); + const Dim64 d2 = + *rc::make_dimension(std::nullopt, {16}); + const Dim64 d3 = + *rc::make_dimension(std::nullopt, {16}); + + instance_domain_tile_offset( + {d1, d2, d3}); + }); +} From 541c75d728a92afda5f38e34898d7ec8319be373 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 27 Oct 2025 12:54:33 -0400 Subject: [PATCH 054/109] Move tile arithmetic functions into tiledb/sm/tile/arithmetic.h --- test/support/CMakeLists.txt | 3 +- .../rapidcheck/show/array_schema_templates.cc | 74 +++++++++ .../rapidcheck/{show.cc => show/query_ast.cc} | 39 +---- test/support/src/array_schema_templates.h | 6 +- tiledb/sm/query/test/CMakeLists.txt | 2 +- tiledb/sm/tile/arithmetic.h | 153 ++++++++++++++++++ tiledb/sm/tile/test/CMakeLists.txt | 3 + .../test/unit_arithmetic.cc} | 111 +------------ 8 files changed, 240 insertions(+), 151 deletions(-) create mode 100644 test/support/rapidcheck/show/array_schema_templates.cc rename test/support/rapidcheck/{show.cc => show/query_ast.cc} (73%) create mode 100644 tiledb/sm/tile/arithmetic.h rename tiledb/sm/{query/test/unit_domain_tile_offset.cc => tile/test/unit_arithmetic.cc} (86%) diff --git a/test/support/CMakeLists.txt b/test/support/CMakeLists.txt index 6eb891a6dba..caae00aa1a9 100644 --- a/test/support/CMakeLists.txt +++ b/test/support/CMakeLists.txt @@ -36,7 +36,8 @@ list(APPEND TILEDB_CORE_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/tiledb/sm/c_api") # Gather the test source files set(TILEDB_TEST_SUPPORT_SOURCES - rapidcheck/show.cc + rapidcheck/show/array_schema_templates.cc + rapidcheck/show/query_ast.cc src/array_helpers.cc src/array_schema_helpers.cc src/ast_helpers.h diff --git a/test/support/rapidcheck/show/array_schema_templates.cc b/test/support/rapidcheck/show/array_schema_templates.cc new file mode 100644 index 00000000000..ca395c902f9 --- /dev/null +++ b/test/support/rapidcheck/show/array_schema_templates.cc @@ -0,0 +1,74 @@ +/** + * @file test/support/rapidcheck/show/array_schema_templates.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides forward declarations of `rc::detail::showValue` + * overloads, which seemingly must be included prior to the rapidcheck + * header files. + */ + +#include +#include +#include + +namespace rc { + +template +void showImpl( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + os << "[" << domain.lower_bound << ", " << domain.upper_bound << "]"; +} + +template <> +void show>( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + showImpl(domain, os); +} + +template <> +void show>( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + showImpl(domain, os); +} + +template +void showImpl( + const tiledb::test::templates::Dimension
& dimension, std::ostream& os) { + os << "{\"domain\": "; + showImpl(dimension.domain, os); + os << ", \"extent\": " << dimension.extent << "}"; +} + +template <> +void show>( + const templates::Dimension& dimension, + std::ostream& os) { + showImpl(dimension, os); +} + +} // namespace rc diff --git a/test/support/rapidcheck/show.cc b/test/support/rapidcheck/show/query_ast.cc similarity index 73% rename from test/support/rapidcheck/show.cc rename to test/support/rapidcheck/show/query_ast.cc index 8b5c0146790..f895667de4c 100644 --- a/test/support/rapidcheck/show.cc +++ b/test/support/rapidcheck/show/query_ast.cc @@ -1,5 +1,5 @@ /** - * @file test/support/rapidcheck/show.cc + * @file test/support/rapidcheck/show/query_ast.cc * * @section LICENSE * @@ -80,40 +80,3 @@ void showValue(const tiledb::sm::ASTNode& node, std::ostream& os) { } } // namespace rc::detail - -namespace rc { - -template -void showImpl( - const tiledb::test::templates::Domain& domain, std::ostream& os) { - os << "[" << domain.lower_bound << ", " << domain.upper_bound << "]"; -} - -template <> -void show>( - const tiledb::test::templates::Domain& domain, std::ostream& os) { - showImpl(domain, os); -} - -template <> -void show>( - const tiledb::test::templates::Domain& domain, std::ostream& os) { - showImpl(domain, os); -} - -template -void showImpl( - const tiledb::test::templates::Dimension
& dimension, std::ostream& os) { - os << "{\"domain\": "; - showImpl(dimension.domain, os); - os << ", \"extent\": " << dimension.extent << "}"; -} - -template <> -void show>( - const templates::Dimension& dimension, - std::ostream& os) { - showImpl(dimension, os); -} - -} // namespace rc diff --git a/test/support/src/array_schema_templates.h b/test/support/src/array_schema_templates.h index 5a89d9fdea1..bd2b77059a8 100644 --- a/test/support/src/array_schema_templates.h +++ b/test/support/src/array_schema_templates.h @@ -123,11 +123,13 @@ struct Domain { /** * A description of a dimension as it pertains to its datatype. */ -template +template struct Dimension { - using value_type = tiledb::type::datatype_traits::value_type; + using value_type = tiledb::type::datatype_traits
::value_type; using domain_type = Domain; + static constexpr tiledb::sm::Datatype DATATYPE = DT; + Dimension() = default; Dimension(Domain domain, value_type extent) : domain(domain) diff --git a/tiledb/sm/query/test/CMakeLists.txt b/tiledb/sm/query/test/CMakeLists.txt index 13742dc2c81..85f64c86be7 100644 --- a/tiledb/sm/query/test/CMakeLists.txt +++ b/tiledb/sm/query/test/CMakeLists.txt @@ -27,7 +27,7 @@ include(unit_test) commence(unit_test query) - this_target_sources(main.cc unit_validity_vector.cc unit_query_condition.cc unit_domain_tile_offset.cc) + this_target_sources(main.cc unit_validity_vector.cc unit_query_condition.cc) # Not actually testing a unit yet, but some things that ought to be units this_target_link_libraries(tiledb_test_support_lib) this_target_link_libraries(ast_test_support_lib) diff --git a/tiledb/sm/tile/arithmetic.h b/tiledb/sm/tile/arithmetic.h new file mode 100644 index 00000000000..c3277dc98ef --- /dev/null +++ b/tiledb/sm/tile/arithmetic.h @@ -0,0 +1,153 @@ +/** + * @file tiledb/sm/tile/arithmetic.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides template definitions for doing tile arithmetic, + * e.g. computing new domains based on offsets and such. + */ + +#include "tiledb/sm/array_schema/dimension.h" +#include "tiledb/sm/misc/types.h" +#include "tiledb/type/range/range.h" + +namespace tiledb::sm { + +/** + * @return true if the range `[start_tile, start_tile + num_tiles)` represents + * a hyper-rectangle inside `domain` with tile sizes given by `tile_extents` + */ +template +static bool is_rectangular_domain( + std::span tile_extents, + const sm::NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + for (uint64_t d_outer = 0; d_outer < tile_extents.size(); d_outer++) { + uint64_t hyperrow_num_tiles = 1; + for (uint64_t d_inner = d_outer + 1; d_inner < tile_extents.size(); + d_inner++) { + const uint64_t d_inner_num_tiles = sm::Dimension::tile_idx( + domain[d_inner].end_as(), + domain[d_inner].start_as(), + tile_extents[d_inner]) + + 1; + hyperrow_num_tiles *= d_inner_num_tiles; + } + + const uint64_t hyperrow_offset = start_tile % hyperrow_num_tiles; + if (hyperrow_offset + num_tiles > hyperrow_num_tiles) { + if (hyperrow_offset != 0) { + return false; + } else if (num_tiles % hyperrow_num_tiles != 0) { + return false; + } + } + } + return true; +} + +/** + * Compute the number of tiles per hyper-row for the given `domain` with tiles + * given by `tile_extents`. + * + * For D dimensions, the returned vector contains `D+1` elements. + * Position 0 is the number of tiles in `domain`. + * For dimension `d`, position `d + 1` is the number of tiles in a hyper-row of + * dimension `d` (and is thus always 1 for the final dimension). + */ +template +std::vector compute_hyperrow_sizes( + std::span tile_extents, const sm::NDRange& domain) { + std::vector hyperrow_sizes(tile_extents.size() + 1, 1); + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t d_num_tiles = + sm::Dimension::tile_idx( + domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + + 1; + hyperrow_sizes[d] = d_num_tiles; + } + for (uint64_t d = tile_extents.size(); d > 0; d--) { + hyperrow_sizes[d - 1] = hyperrow_sizes[d - 1] * hyperrow_sizes[d]; + } + + return hyperrow_sizes; +} + +/** + * @return a new range which is contained the rectangle within `domain` defined + * by `[start_tile, start_tile + num_tiles)` for the tile sizes given by + * `tile_extents`. If this does not represent a valid rectangle then + * `std::nullopt` is returned instead. + */ +template +static std::optional domain_tile_offset( + std::span tile_extents, + const sm::NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + sm::NDRange r; + + const std::vector dimension_sizes = + compute_hyperrow_sizes(tile_extents, domain); + + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t outer_num_tiles = dimension_sizes[d]; + const uint64_t hyperrow_num_tiles = dimension_sizes[d + 1]; + + const T this_dimension_start_tile = (start_tile / hyperrow_num_tiles) % + (outer_num_tiles / hyperrow_num_tiles); + const T this_dimension_end_tile = + ((start_tile + num_tiles - 1) / hyperrow_num_tiles) % + (outer_num_tiles / hyperrow_num_tiles); + + if (start_tile % hyperrow_num_tiles == 0) { + // aligned to the start of the hyperrow + if (num_tiles > hyperrow_num_tiles && + num_tiles % hyperrow_num_tiles != 0) { + return std::nullopt; + } + } else { + // begins in the middle of the hyperrow + const uint64_t offset = start_tile % hyperrow_num_tiles; + if (offset + num_tiles > hyperrow_num_tiles) { + return std::nullopt; + } + } + + const T start = + domain[d].start_as() + (this_dimension_start_tile * tile_extents[d]); + const T end = domain[d].start_as() + + (this_dimension_end_tile * tile_extents[d]) + + tile_extents[d] - 1; + r.push_back(Range(start, end)); + } + + return r; +} + +} // namespace tiledb::sm diff --git a/tiledb/sm/tile/test/CMakeLists.txt b/tiledb/sm/tile/test/CMakeLists.txt index 6feeb4eafb1..bb06dbdcb41 100644 --- a/tiledb/sm/tile/test/CMakeLists.txt +++ b/tiledb/sm/tile/test/CMakeLists.txt @@ -29,7 +29,10 @@ include(unit_test) commence(unit_test tile) this_target_sources( main.cc + unit_arithmetic.cc unit_tile.cc + ${CMAKE_SOURCE_DIR}/test/support/rapidcheck/show/array_schema_templates.cc ) this_target_object_libraries(tile mem_helpers) + this_target_link_libraries(rapidcheck) conclude(unit_test) diff --git a/tiledb/sm/query/test/unit_domain_tile_offset.cc b/tiledb/sm/tile/test/unit_arithmetic.cc similarity index 86% rename from tiledb/sm/query/test/unit_domain_tile_offset.cc rename to tiledb/sm/tile/test/unit_arithmetic.cc index 4fa448be1c2..62242da8a6d 100644 --- a/tiledb/sm/query/test/unit_domain_tile_offset.cc +++ b/tiledb/sm/tile/test/unit_arithmetic.cc @@ -4,122 +4,15 @@ #include "test/support/src/array_schema_templates.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/misc/types.h" +#include "tiledb/sm/tile/arithmetic.h" #include "tiledb/type/range/range.h" #include using namespace tiledb; +using namespace sm; using namespace tiledb::test; -template -static bool is_rectangular_domain( - std::span tile_extents, - const sm::NDRange& domain, - uint64_t start_tile, - uint64_t num_tiles) { - for (uint64_t d_outer = 0; d_outer < tile_extents.size(); d_outer++) { - uint64_t hyperrow_num_tiles = 1; - for (uint64_t d_inner = d_outer + 1; d_inner < tile_extents.size(); - d_inner++) { - const uint64_t d_inner_num_tiles = sm::Dimension::tile_idx( - domain[d_inner].end_as(), - domain[d_inner].start_as(), - tile_extents[d_inner]) + - 1; - hyperrow_num_tiles *= d_inner_num_tiles; - } - - const uint64_t hyperrow_offset = start_tile % hyperrow_num_tiles; - if (hyperrow_offset + num_tiles > hyperrow_num_tiles) { - if (hyperrow_offset != 0) { - return false; - } else if (num_tiles % hyperrow_num_tiles != 0) { - return false; - } - } - } - return true; -} - -/** - * Compute the number of tiles per hyper-row for the given `domain` with tiles - * given by `tile_extents`. - * - * For D dimensions, the returned vector contains `D+1` elements. - * Position 0 is the number of tiles in `domain`. - * For dimension `d`, position `d + 1` is the number of tiles in a hyper-row of - * dimension `d` (and is thus always 1 for the final dimension). - */ -template -std::vector compute_hyperrow_sizes( - std::span tile_extents, const sm::NDRange& domain) { - std::vector hyperrow_sizes(tile_extents.size() + 1, 1); - for (uint64_t d = 0; d < tile_extents.size(); d++) { - const uint64_t d_num_tiles = - sm::Dimension::tile_idx( - domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + - 1; - hyperrow_sizes[d] = d_num_tiles; - } - for (uint64_t d = tile_extents.size(); d > 0; d--) { - hyperrow_sizes[d - 1] = hyperrow_sizes[d - 1] * hyperrow_sizes[d]; - } - - return hyperrow_sizes; -} - -/** - * @return a new range which is contained the rectangle within `domain` defined - * by `[start_tile, start_tile + num_tiles)` for the tile sizes given by - * `tile_extents`. If this does not represent a valid rectangle then - * `std::nullopt` is returned instead. - */ -template -static std::optional domain_tile_offset( - std::span tile_extents, - const sm::NDRange& domain, - uint64_t start_tile, - uint64_t num_tiles) { - sm::NDRange r; - - const std::vector dimension_sizes = - compute_hyperrow_sizes(tile_extents, domain); - - for (uint64_t d = 0; d < tile_extents.size(); d++) { - const uint64_t outer_num_tiles = dimension_sizes[d]; - const uint64_t hyperrow_num_tiles = dimension_sizes[d + 1]; - - const T this_dimension_start_tile = (start_tile / hyperrow_num_tiles) % - (outer_num_tiles / hyperrow_num_tiles); - const T this_dimension_end_tile = - ((start_tile + num_tiles - 1) / hyperrow_num_tiles) % - (outer_num_tiles / hyperrow_num_tiles); - - if (start_tile % hyperrow_num_tiles == 0) { - // aligned to the start of the hyperrow - if (num_tiles > hyperrow_num_tiles && - num_tiles % hyperrow_num_tiles != 0) { - return std::nullopt; - } - } else { - // begins in the middle of the hyperrow - const uint64_t offset = start_tile % hyperrow_num_tiles; - if (offset + num_tiles > hyperrow_num_tiles) { - return std::nullopt; - } - } - - const T start = - domain[d].start_as() + (this_dimension_start_tile * tile_extents[d]); - const T end = domain[d].start_as() + - (this_dimension_end_tile * tile_extents[d]) + - tile_extents[d] - 1; - r.push_back(Range(start, end)); - } - - return r; -} - template static bool is_rectangular_domain( std::span tile_extents, From 7167bf6f11944638b468f74b9cbaaef124c5b67a Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 27 Oct 2025 14:13:56 -0400 Subject: [PATCH 055/109] Update for col major --- tiledb/sm/tile/arithmetic.h | 39 ++++-- tiledb/sm/tile/test/unit_arithmetic.cc | 159 +++++++++++++++---------- 2 files changed, 122 insertions(+), 76 deletions(-) diff --git a/tiledb/sm/tile/arithmetic.h b/tiledb/sm/tile/arithmetic.h index c3277dc98ef..076e530a859 100644 --- a/tiledb/sm/tile/arithmetic.h +++ b/tiledb/sm/tile/arithmetic.h @@ -32,6 +32,7 @@ */ #include "tiledb/sm/array_schema/dimension.h" +#include "tiledb/sm/enums/layout.h" #include "tiledb/sm/misc/types.h" #include "tiledb/type/range/range.h" @@ -43,6 +44,7 @@ namespace tiledb::sm { */ template static bool is_rectangular_domain( + Layout tile_order, std::span tile_extents, const sm::NDRange& domain, uint64_t start_tile, @@ -51,11 +53,13 @@ static bool is_rectangular_domain( uint64_t hyperrow_num_tiles = 1; for (uint64_t d_inner = d_outer + 1; d_inner < tile_extents.size(); d_inner++) { - const uint64_t d_inner_num_tiles = sm::Dimension::tile_idx( - domain[d_inner].end_as(), - domain[d_inner].start_as(), - tile_extents[d_inner]) + - 1; + const uint64_t d = + (tile_order == Layout::ROW_MAJOR ? d_inner : + tile_extents.size() - d_inner - 1); + const uint64_t d_inner_num_tiles = + sm::Dimension::tile_idx( + domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + + 1; hyperrow_num_tiles *= d_inner_num_tiles; } @@ -82,14 +86,18 @@ static bool is_rectangular_domain( */ template std::vector compute_hyperrow_sizes( - std::span tile_extents, const sm::NDRange& domain) { + Layout tile_order, + std::span tile_extents, + const sm::NDRange& domain) { std::vector hyperrow_sizes(tile_extents.size() + 1, 1); - for (uint64_t d = 0; d < tile_extents.size(); d++) { + for (uint64_t di = 0; di < tile_extents.size(); di++) { + const uint64_t d = + (tile_order == Layout::ROW_MAJOR ? di : tile_extents.size() - di - 1); const uint64_t d_num_tiles = sm::Dimension::tile_idx( domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + 1; - hyperrow_sizes[d] = d_num_tiles; + hyperrow_sizes[di] = d_num_tiles; } for (uint64_t d = tile_extents.size(); d > 0; d--) { hyperrow_sizes[d - 1] = hyperrow_sizes[d - 1] * hyperrow_sizes[d]; @@ -106,18 +114,23 @@ std::vector compute_hyperrow_sizes( */ template static std::optional domain_tile_offset( + Layout tile_order, std::span tile_extents, const sm::NDRange& domain, uint64_t start_tile, uint64_t num_tiles) { sm::NDRange r; + r.resize(tile_extents.size()); const std::vector dimension_sizes = - compute_hyperrow_sizes(tile_extents, domain); + compute_hyperrow_sizes(tile_order, tile_extents, domain); + + for (uint64_t di = 0; di < tile_extents.size(); di++) { + const uint64_t d = + (tile_order == Layout::ROW_MAJOR ? di : tile_extents.size() - di - 1); - for (uint64_t d = 0; d < tile_extents.size(); d++) { - const uint64_t outer_num_tiles = dimension_sizes[d]; - const uint64_t hyperrow_num_tiles = dimension_sizes[d + 1]; + const uint64_t outer_num_tiles = dimension_sizes[di]; + const uint64_t hyperrow_num_tiles = dimension_sizes[di + 1]; const T this_dimension_start_tile = (start_tile / hyperrow_num_tiles) % (outer_num_tiles / hyperrow_num_tiles); @@ -144,7 +157,7 @@ static std::optional domain_tile_offset( const T end = domain[d].start_as() + (this_dimension_end_tile * tile_extents[d]) + tile_extents[d] - 1; - r.push_back(Range(start, end)); + r[d] = Range(start, end); } return r; diff --git a/tiledb/sm/tile/test/unit_arithmetic.cc b/tiledb/sm/tile/test/unit_arithmetic.cc index 62242da8a6d..e2332b58068 100644 --- a/tiledb/sm/tile/test/unit_arithmetic.cc +++ b/tiledb/sm/tile/test/unit_arithmetic.cc @@ -19,10 +19,12 @@ static bool is_rectangular_domain( T lower_bound, T upper_bound, uint64_t start_tile, - uint64_t num_tiles) { + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { sm::NDRange r; r.push_back(Range(lower_bound, upper_bound)); - return is_rectangular_domain(tile_extents, r, start_tile, num_tiles); + return is_rectangular_domain( + tile_order, tile_extents, r, start_tile, num_tiles); } template @@ -33,11 +35,13 @@ static bool is_rectangular_domain( T d2_lower_bound, T d2_upper_bound, uint64_t start_tile, - uint64_t num_tiles) { + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { sm::NDRange r; r.push_back(Range(d1_lower_bound, d1_upper_bound)); r.push_back(Range(d2_lower_bound, d2_upper_bound)); - return is_rectangular_domain(tile_extents, r, start_tile, num_tiles); + return is_rectangular_domain( + tile_order, tile_extents, r, start_tile, num_tiles); } template @@ -45,7 +49,8 @@ static bool is_rectangular_domain( const templates::Dimension
& d1, const templates::Dimension
& d2, uint64_t start_tile, - uint64_t num_tiles) { + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { using Coord = templates::Dimension
::value_type; const std::vector extents = {d1.extent, d2.extent}; return is_rectangular_domain( @@ -55,7 +60,8 @@ static bool is_rectangular_domain( d2.domain.lower_bound, d2.domain.upper_bound, start_tile, - num_tiles); + num_tiles, + tile_order); } template @@ -64,14 +70,16 @@ static bool is_rectangular_domain( const templates::Dimension
& d2, const templates::Dimension
& d3, uint64_t start_tile, - uint64_t num_tiles) { + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { using Coord = templates::Dimension
::value_type; const std::vector extents = {d1.extent, d2.extent, d3.extent}; sm::NDRange r; r.push_back(Range(d1.domain.lower_bound, d1.domain.upper_bound)); r.push_back(Range(d2.domain.lower_bound, d2.domain.upper_bound)); r.push_back(Range(d3.domain.lower_bound, d3.domain.upper_bound)); - return is_rectangular_domain(extents, r, start_tile, num_tiles); + return is_rectangular_domain( + tile_order, extents, r, start_tile, num_tiles); } // in one dimension all domains are rectangles @@ -319,11 +327,12 @@ std::optional instance_domain_tile_offset( std::span tile_extents, const sm::NDRange& domain, uint64_t start_tile, - uint64_t num_tiles) { - const bool expect_rectangle = - is_rectangular_domain(tile_extents, domain, start_tile, num_tiles); - const std::optional adjusted_domain = - domain_tile_offset(tile_extents, domain, start_tile, num_tiles); + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { + const bool expect_rectangle = is_rectangular_domain( + tile_order, tile_extents, domain, start_tile, num_tiles); + const std::optional adjusted_domain = domain_tile_offset( + tile_order, tile_extents, domain, start_tile, num_tiles); if (!expect_rectangle) { ASSERTER(!adjusted_domain.has_value()); return std::nullopt; @@ -344,15 +353,17 @@ std::optional instance_domain_tile_offset( ASSERTER(num_tiles_result == num_tiles); const std::vector hyperrow_sizes = - compute_hyperrow_sizes(tile_extents, domain); + compute_hyperrow_sizes(tile_order, tile_extents, domain); uint64_t start_tile_result = 0; - for (uint64_t d = 0; d < tile_extents.size(); d++) { + for (uint64_t di = 0; di < tile_extents.size(); di++) { + const uint64_t d = + (tile_order == Layout::ROW_MAJOR ? di : tile_extents.size() - di - 1); const uint64_t start_tile_this_dimension = sm::Dimension::tile_idx( adjusted_domain.value()[d].start_as(), domain[d].start_as(), tile_extents[d]); - start_tile_result += start_tile_this_dimension * hyperrow_sizes[d + 1]; + start_tile_result += start_tile_this_dimension * hyperrow_sizes[di + 1]; } ASSERTER(start_tile_result == start_tile); @@ -361,7 +372,9 @@ std::optional instance_domain_tile_offset( template void instance_domain_tile_offset( - std::span tile_extents, const sm::NDRange& domain) { + std::span tile_extents, + const sm::NDRange& domain, + Layout tile_order = Layout::ROW_MAJOR) { uint64_t total_tiles = 1; for (uint64_t d = 0; d < tile_extents.size(); d++) { const uint64_t num_tiles_this_dimension = @@ -374,7 +387,7 @@ void instance_domain_tile_offset( for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; num_tiles++) { instance_domain_tile_offset( - tile_extents, domain, start_tile, num_tiles); + tile_extents, domain, start_tile, num_tiles, tile_order); } } } @@ -385,7 +398,8 @@ std::optional>& dims, uint64_t start_tile, - uint64_t num_tiles) { + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { using Coord = typename templates::Dimension
::value_type; std::vector tile_extents; @@ -399,7 +413,7 @@ instance_domain_tile_offset( } const auto range = instance_domain_tile_offset( - tile_extents, domain, start_tile, num_tiles); + tile_extents, domain, start_tile, num_tiles, tile_order); if (!range.has_value()) { return std::nullopt; } @@ -414,7 +428,8 @@ instance_domain_tile_offset( template void instance_domain_tile_offset( - const std::vector>& dims) { + const std::vector>& dims, + Layout tile_order = Layout::ROW_MAJOR) { using Coord = templates::Dimension
::value_type; std::vector tile_extents; @@ -427,7 +442,8 @@ void instance_domain_tile_offset( domain.push_back(Range(dim.domain.lower_bound, dim.domain.upper_bound)); } - instance_domain_tile_offset(tile_extents, domain); + instance_domain_tile_offset( + tile_extents, domain, tile_order); } TEST_CASE("domain_tile_offset 1d", "[arithmetic]") { @@ -459,70 +475,54 @@ TEST_CASE("domain_tile_offset 2d", "[arithmetic]") { const Dim64 d2( d2_lower_bound, d2_lower_bound + (4 * d2_extent) - 1, d2_extent); + auto make_d1 = [&](uint64_t r_start, uint64_t r_end) { + return Dom64( + d1_lower_bound + r_start * d1_extent, + d1_lower_bound + r_end * d1_extent + d1_extent - 1); + }; + auto make_d2 = [&](uint64_t c_start, uint64_t c_end) { + return Dom64( + d2_lower_bound + c_start * d2_extent, + d2_lower_bound + c_end * d2_extent + d2_extent - 1); + }; + SECTION("Whole domain") { - const auto r = - instance_domain_tile_offset({d1, d2}, 0, 20); + const Layout tile_order = GENERATE(Layout::ROW_MAJOR, Layout::COL_MAJOR); + const auto r = instance_domain_tile_offset( + {d1, d2}, 0, 20, tile_order); CHECK(r == std::vector{d1.domain, d2.domain}); } SECTION("Sub-rectangle") { const auto r1 = instance_domain_tile_offset({d1, d2}, 4, 8); - CHECK( - r1 == std::vector{ - Dom64( - d1_lower_bound + d1_extent, - d1_lower_bound + 3 * d1_extent - 1), - d2.domain}); + CHECK(r1 == std::vector{make_d1(1, 2), d2.domain}); const auto r2 = instance_domain_tile_offset({d1, d2}, 8, 4); - CHECK( - r2 == std::vector{ - Dom64( - d1_lower_bound + 2 * d1_extent, - d1_lower_bound + 3 * d1_extent - 1), - d2.domain}); + CHECK(r2 == std::vector{make_d1(2, 2), d2.domain}); const auto r3 = instance_domain_tile_offset({d1, d2}, 8, 12); - CHECK( - r3 == std::vector{ - Dom64( - d1_lower_bound + 2 * d1_extent, - d1_lower_bound + 5 * d1_extent - 1), - d2.domain}); + CHECK(r3 == std::vector{make_d1(2, 4), d2.domain}); } SECTION("Line") { const auto r1 = instance_domain_tile_offset({d1, d2}, 0, 2); - CHECK( - r1 == std::vector{ - Dom64(d1_lower_bound, d1_lower_bound + d1_extent - 1), - Dom64( - d2_lower_bound + 0 * d2_extent, - d2_lower_bound + 2 * d2_extent - 1)}); + CHECK(r1 == std::vector{make_d1(0, 0), make_d2(0, 1)}); const auto r2 = instance_domain_tile_offset({d1, d2}, 1, 2); CHECK( r2 == std::vector{ - Dom64(d1_lower_bound, d1_lower_bound + d1_extent - 1), - Dom64( - d2_lower_bound + 1 * d2_extent, - d2_lower_bound + 3 * d2_extent - 1)}); + make_d1(0, 0), + make_d2(1, 2), + }); const auto r3 = instance_domain_tile_offset({d1, d2}, 9, 3); - CHECK( - r3 == std::vector{ - Dom64( - d1_lower_bound + 2 * d1_extent, - d1_lower_bound + 3 * d1_extent - 1), - Dom64( - d2_lower_bound + 1 * d2_extent, - d2_lower_bound + 4 * d2_extent - 1)}); + CHECK(r3 == std::vector{make_d1(2, 2), make_d2(1, 3)}); } SECTION("Align start but not end") { @@ -548,13 +548,30 @@ TEST_CASE("domain_tile_offset 2d", "[arithmetic]") { instance_domain_tile_offset({d1, d2}, 5, 8); CHECK(r3 == std::optional>{}); } + + SECTION("Column major") { + const auto r1 = instance_domain_tile_offset( + {d1, d2}, 0, 10, Layout::COL_MAJOR); + CHECK(r1 == std::vector{d1.domain, make_d2(0, 1)}); + + const auto r2 = instance_domain_tile_offset( + {d1, d2}, 11, 4, Layout::COL_MAJOR); + CHECK(r2 == std::vector{make_d1(1, 4), make_d2(2, 2)}); + + const auto r3 = instance_domain_tile_offset( + {d1, d2}, 11, 5, Layout::COL_MAJOR); + CHECK(r3 == std::optional>{}); + } } rc::prop("any tiles", []() { const Dim64 d1 = *rc::make_dimension(std::nullopt, {64}); const Dim64 d2 = *rc::make_dimension(std::nullopt, {64}); + const Layout tile_order = + *rc::gen::element(Layout::ROW_MAJOR, Layout::COL_MAJOR); - instance_domain_tile_offset({d1, d2}); + instance_domain_tile_offset( + {d1, d2}, tile_order); }); } @@ -594,8 +611,12 @@ TEST_CASE("domain_tile_offset 3d", "[arithmetic]") { }; SECTION("Whole domain") { + const Layout tile_order = GENERATE(Layout::ROW_MAJOR, Layout::COL_MAJOR); const auto r = instance_domain_tile_offset( - {d1, d2, d3}, 0, d1.num_tiles() * d2.num_tiles() * d3.num_tiles()); + {d1, d2, d3}, + 0, + d1.num_tiles() * d2.num_tiles() * d3.num_tiles(), + tile_order); CHECK(r == std::vector{d1.domain, d2.domain, d3.domain}); } @@ -672,6 +693,16 @@ TEST_CASE("domain_tile_offset 3d", "[arithmetic]") { instance_domain_tile_offset({d1, d2, d3}, 77, 8); CHECK(r2 == std::optional>{}); } + + SECTION("Column major") { + const auto r1 = instance_domain_tile_offset( + {d1, d2, d3}, 54, 36, Layout::COL_MAJOR); + CHECK(r1 == std::vector{d1.domain, d2.domain, make_d3(3, 4)}); + + const auto r2 = instance_domain_tile_offset( + {d1, d2, d3}, 78, 12, Layout::COL_MAJOR); + CHECK(r2 == std::vector{d1.domain, make_d2(2, 5), make_d3(4, 4)}); + } } rc::prop("any tiles", []() { @@ -681,8 +712,10 @@ TEST_CASE("domain_tile_offset 3d", "[arithmetic]") { *rc::make_dimension(std::nullopt, {16}); const Dim64 d3 = *rc::make_dimension(std::nullopt, {16}); + const Layout tile_order = + *rc::gen::element(Layout::ROW_MAJOR, Layout::COL_MAJOR); instance_domain_tile_offset( - {d1, d2, d3}); + {d1, d2, d3}, tile_order); }); } From 13e6917f0c353b68ebcef988cc5e5547eaffd408 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 27 Oct 2025 14:24:54 -0400 Subject: [PATCH 056/109] Un-comment GENERATEs --- tiledb/sm/tile/test/unit_arithmetic.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tiledb/sm/tile/test/unit_arithmetic.cc b/tiledb/sm/tile/test/unit_arithmetic.cc index e2332b58068..37c016dafe1 100644 --- a/tiledb/sm/tile/test/unit_arithmetic.cc +++ b/tiledb/sm/tile/test/unit_arithmetic.cc @@ -580,12 +580,12 @@ TEST_CASE("domain_tile_offset 3d", "[arithmetic]") { using Dom64 = Dim64::domain_type; SECTION("Rectangular prism examples") { - const uint64_t d1_lower_bound = 0; // GENERATE(0, 3); - const uint64_t d1_extent = 1; // GENERATE(1, 4); - const uint64_t d2_lower_bound = 0; // GENERATE(0, 3); - const uint64_t d2_extent = 1; // GENERATE(1, 4); - const uint64_t d3_lower_bound = 0; // GENERATE(0, 3); - const uint64_t d3_extent = 1; // GENERATE(1, 4); + const uint64_t d1_lower_bound = GENERATE(0, 3); + const uint64_t d1_extent = GENERATE(1, 4); + const uint64_t d2_lower_bound = GENERATE(0, 3); + const uint64_t d2_extent = GENERATE(1, 4); + const uint64_t d3_lower_bound = GENERATE(0, 3); + const uint64_t d3_extent = GENERATE(1, 4); const Dim64 d1( d1_lower_bound, d1_lower_bound + (3 * d1_extent) - 1, d1_extent); From c3d3d64c9b88498d4290e2fdf18fb70455855b15 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 27 Oct 2025 14:42:28 -0400 Subject: [PATCH 057/109] Use tile arithmetic functions in global order writer --- .../sm/query/writers/global_order_writer.cc | 199 ++++++++---------- 1 file changed, 93 insertions(+), 106 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 215f4b2f038..1f20f53426b 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -49,6 +49,7 @@ #include "tiledb/sm/query/hilbert_order.h" #include "tiledb/sm/query/query_macros.h" #include "tiledb/sm/stats/global_stats.h" +#include "tiledb/sm/tile/arithmetic.h" #include "tiledb/sm/tile/generic_tile_io.h" #include "tiledb/sm/tile/tile_metadata_generator.h" #include "tiledb/sm/tile/writer_tile_tuple.h" @@ -61,11 +62,78 @@ using namespace tiledb::sm::stats; namespace tiledb::sm { -static NDRange domain_tile_offset( +/** + * See `tiledb/sm/tile/arithmetic.h` function `is_rectangular_domain`. + * + * When writing multiple dense fragments the domain of each fragment + * must accurately reflect the coordinates contained in that fragment. + * This is called in `GlobalOrderWriter::identify_fragment_tile_boundaries` for + * each of the input tiles to determine whether a rectangle is formed and + * including a tile in a fragment is sound. + */ +static bool is_rectangular_domain( + const ArraySchema& arrayschema, + const NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + const Domain& arraydomain = arrayschema.domain(); + + auto impl = [&](T) { + if constexpr (TileDBIntegral) { + std::vector tile_extents; + tile_extents.reserve(arraydomain.dim_num()); + for (uint64_t d = 0; d < arraydomain.dim_num(); d++) { + tile_extents.push_back(arraydomain.tile_extent(d).rvalue_as()); + } + + return is_rectangular_domain( + arrayschema.tile_order(), + tile_extents, + domain, + start_tile, + num_tiles); + } else { + return false; + } + }; + return apply_with_type(impl, arraydomain.dimension_ptr(0)->type()); +} + +/** + * See `tiledb/sm/tile/arithmetic.h` function `domain_tile_offset`. + * + * When writing multiple dense fragments the domain of each fragment + * must accurately reflect the coordinates contained in that fragment. + * This is called when starting a new fragment to update the domain of the + * previous fragment and set the correct starting domain of the new one. + */ +static std::optional domain_tile_offset( const ArraySchema& arrayschema, const NDRange& domain, uint64_t start_tile, - uint64_t num_tiles); + uint64_t num_tiles) { + const Domain& arraydomain = arrayschema.domain(); + + auto impl = [&](T) { + if constexpr (TileDBIntegral) { + std::vector tile_extents; + tile_extents.reserve(arraydomain.dim_num()); + for (uint64_t d = 0; d < arraydomain.dim_num(); d++) { + tile_extents.push_back(arraydomain.tile_extent(d).rvalue_as()); + } + + return domain_tile_offset( + arrayschema.tile_order(), + tile_extents, + domain, + start_tile, + num_tiles); + } else { + return std::optional{}; + } + }; + return apply_with_type(impl, arraydomain.dimension_ptr(0)->type()); +} class GlobalOrderWriterException : public StatusException { public: @@ -716,12 +784,13 @@ Status GlobalOrderWriter::finalize_global_write_state() { if (dense()) { const uint64_t num_tiles_in_fragment = meta->loaded_metadata()->tile_offsets()[0].size(); - NDRange fragment_domain = domain_tile_offset( + std::optional fragment_domain = domain_tile_offset( array_schema_, subarray_.ndrange(0), global_write_state_->dense_.domain_tile_offset_, num_tiles_in_fragment); - meta->set_domain(fragment_domain); + iassert(fragment_domain.has_value()); + meta->set_domain(std::move(fragment_domain.value())); } // Check that the same number of cells was written across attributes @@ -1505,33 +1574,6 @@ Status GlobalOrderWriter::prepare_full_tiles_var( return Status::Ok(); } -/** - * @return the number of tiles in a "hyper-row" of `subarray` within - * `arrayschema` - * - * If a "hyper-rectangle" is a generalization of a rectangle to N dimensions, - * then let's define a "hyper-row" to be a generalization of a row to N - * dimensions. That is, a "hyper-row" is a hyper-rectangle whose length is 1 in - * the outer-most dimension. - */ -static uint64_t compute_hyperrow_num_tiles( - const ArraySchema& arrayschema, const NDRange& subarray) { - const uint64_t rdim = - (arrayschema.tile_order() == Layout::ROW_MAJOR ? - 0 : - arrayschema.dim_num() - 1); - - NDRange adjusted = subarray; - - // normalize `adjusted` to a single hyper-row - memcpy( - adjusted[rdim].end_fixed(), - adjusted[rdim].start_fixed(), - adjusted[rdim].size() / 2); - - return arrayschema.domain().tile_num(adjusted); -} - /** * Identifies the division of input cells into target fragments, * using `max_fragment_size_` as a hard limit on the target fragment size. @@ -1574,7 +1616,17 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( uint64_t running_tiles_size = current_fragment_size_; uint64_t fragment_size = current_fragment_size_; - // NB: gcc has a false positive uninitialized use warning for `fragment_end` + std::optional subarray_tile_offset; + if (dense()) { + if (global_write_state_->frag_meta_) { + subarray_tile_offset = global_write_state_->dense_.domain_tile_offset_ + + global_write_state_->frag_meta_->tile_index_base(); + } else { + subarray_tile_offset = 0; + } + } + +// NB: gcc has a false positive uninitialized use warning for `fragment_end` #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" uint64_t fragment_start = 0; @@ -1582,18 +1634,6 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( std::vector fragments; #pragma GCC diagnostic pop - uint64_t hyperrow_offset = 0; - std::optional hyperrow_num_tiles; - if (dense()) { - hyperrow_num_tiles = - compute_hyperrow_num_tiles(array_schema_, subarray_.ndrange(0)); - - if (global_write_state_->frag_meta_) { - hyperrow_offset = global_write_state_->dense_.domain_tile_offset_ + - global_write_state_->frag_meta_->tile_index_base(); - } - } - // Make sure we don't write more than the desired fragment size. for (uint64_t t = 0; t < tile_num; t++) { uint64_t tile_size = 0; @@ -1624,10 +1664,12 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( fragment_end = std::nullopt; } - if (!hyperrow_num_tiles.has_value() || - ((hyperrow_offset + t + 1) - fragment_start) % - hyperrow_num_tiles.value() == - 0) { + if (!subarray_tile_offset.has_value() || + is_rectangular_domain( + array_schema_, + subarray_.ndrange(0), + subarray_tile_offset.value() + fragment_start, + t - fragment_start + 1)) { fragment_size = running_tiles_size + tile_size; fragment_end = t + 1; } @@ -1645,62 +1687,6 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( .last_fragment_size_ = fragment_size}; } -/** - * Splits a domain at a tile boundary and returns the two halves of the split. - * - * When writing multiple dense fragments the domain of each fragment - * must accurately reflect the coordinates contained in that fragment. - * This is called when starting a new fragment to update the domain of the - * previous fragment and set the correct starting domain of the new one. - * - * @precondition `tile_offset` must be an offset which bisects the input - * hyper-rectangle into two new hyper-rectangle - */ -static NDRange domain_tile_offset( - const ArraySchema& arrayschema, - const NDRange& domain, - uint64_t start_tile, - uint64_t num_tiles) { - const Domain& arraydomain = arrayschema.domain(); - const uint64_t domain_num_tiles = arraydomain.tile_num(domain); - const uint64_t hyperrow_num_tiles = - compute_hyperrow_num_tiles(arrayschema, domain); - iassert(domain_num_tiles % hyperrow_num_tiles == 0); - iassert(start_tile % hyperrow_num_tiles == 0); - iassert(num_tiles % hyperrow_num_tiles == 0); - - const uint64_t start_hyperrow = start_tile / hyperrow_num_tiles; - const uint64_t num_hyperrows = num_tiles / hyperrow_num_tiles; - iassert(num_hyperrows > 0); - - const uint64_t rdim = - (arrayschema.tile_order() == Layout::ROW_MAJOR ? - 0 : - arrayschema.dim_num() - 1); - - NDRange adjusted = domain; - - auto fix_bounds = [&](T) { - const T extent = arraydomain.tile_extent(rdim).rvalue_as(); - const T lower_bound = *static_cast(domain[rdim].start_fixed()); - const T upper_bound = *static_cast(domain[rdim].end_fixed()); - T* start = static_cast(adjusted[rdim].start_fixed()); - T* end = static_cast(adjusted[rdim].end_fixed()); - - // tiles begin at [LB, LB + E, LB + 2E, ...] where LB is lower bound, E is - // extent - auto align = [lower_bound, extent](T value) -> T { - return lower_bound + ((value - lower_bound) / extent) * extent; - }; - - *start = std::max(lower_bound, align(*start + extent * start_hyperrow)); - *end = std::min(upper_bound, align(*start + extent * num_hyperrows) - 1); - }; - apply_with_type(fix_bounds, arraydomain.dimension_ptr(rdim)->type()); - - return adjusted; -} - Status GlobalOrderWriter::start_new_fragment() { // finish off current fragment if there is one if (global_write_state_->frag_meta_) { @@ -1714,12 +1700,13 @@ Status GlobalOrderWriter::start_new_fragment() { if (dense()) { const uint64_t num_tiles_in_fragment = frag_meta->loaded_metadata()->tile_offsets()[0].size(); - NDRange fragment_domain = domain_tile_offset( + std::optional fragment_domain = domain_tile_offset( array_schema_, subarray_.ndrange(0), global_write_state_->dense_.domain_tile_offset_, num_tiles_in_fragment); - frag_meta->set_domain(fragment_domain); + iassert(fragment_domain.has_value()); + frag_meta->set_domain(std::move(fragment_domain.value())); global_write_state_->dense_.domain_tile_offset_ += num_tiles_in_fragment; } From 591629efb539633400e6650ba5b9c1dbf1151e7f Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 09:28:56 -0400 Subject: [PATCH 058/109] tiledb/sm/tile/test/arithmetic.h --- tiledb/sm/tile/arithmetic.h | 4 ++ tiledb/sm/tile/test/arithmetic.h | 90 ++++++++++++++++++++++++++ tiledb/sm/tile/test/unit_arithmetic.cc | 28 ++------ 3 files changed, 99 insertions(+), 23 deletions(-) create mode 100644 tiledb/sm/tile/test/arithmetic.h diff --git a/tiledb/sm/tile/arithmetic.h b/tiledb/sm/tile/arithmetic.h index 076e530a859..1bf2c0daf00 100644 --- a/tiledb/sm/tile/arithmetic.h +++ b/tiledb/sm/tile/arithmetic.h @@ -30,6 +30,8 @@ * This file provides template definitions for doing tile arithmetic, * e.g. computing new domains based on offsets and such. */ +#ifndef TILEDB_TILE_ARITHMETIC_H +#define TILEDB_TILE_ARITHMETIC_H #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/layout.h" @@ -164,3 +166,5 @@ static std::optional domain_tile_offset( } } // namespace tiledb::sm + +#endif diff --git a/tiledb/sm/tile/test/arithmetic.h b/tiledb/sm/tile/test/arithmetic.h new file mode 100644 index 00000000000..be63f318cfe --- /dev/null +++ b/tiledb/sm/tile/test/arithmetic.h @@ -0,0 +1,90 @@ +/** + * @file tiledb/sm/tile/arithmetic.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides template definitions for functions which are + * used to test tile arithmetic. + */ +#ifndef TILEDB_TILE_TEST_ARITHMETIC_H +#define TILEDB_TILE_TEST_ARITHMETIC_H + +#include "tiledb/sm/tile/arithmetic.h" + +namespace tiledb::test { + +/** + * @return the number of tiles in `subrectangle` based on the tile sizes in + * `tile_extents` + */ +template +uint64_t compute_num_tiles( + std::span tile_extents, const sm::NDRange& subrectangle) { + uint64_t num_tiles_result = 1; + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t num_tiles_this_dimension = sm::Dimension::tile_idx( + subrectangle[d].end_as(), + subrectangle[d].start_as(), + tile_extents[d]) + + 1; + num_tiles_result *= num_tiles_this_dimension; + } + + return num_tiles_result; +} + +/** + * @return the tile offset of `subrectangle` within `domain` based on the tile + * sizes in `tile_extents` + */ +template +uint64_t compute_start_tile( + sm::Layout tile_order, + std::span tile_extents, + const sm::NDRange& domain, + const sm::NDRange& subrectangle) { + const std::vector hyperrow_sizes = + sm::compute_hyperrow_sizes(tile_order, tile_extents, domain); + + uint64_t start_tile_result = 0; + for (uint64_t di = 0; di < tile_extents.size(); di++) { + const uint64_t d = + (tile_order == sm::Layout::ROW_MAJOR ? di : + tile_extents.size() - di - 1); + const uint64_t start_tile_this_dimension = sm::Dimension::tile_idx( + subrectangle[d].start_as(), + domain[d].start_as(), + tile_extents[d]); + start_tile_result += start_tile_this_dimension * hyperrow_sizes[di + 1]; + } + + return start_tile_result; +} + +} // namespace tiledb::test + +#endif diff --git a/tiledb/sm/tile/test/unit_arithmetic.cc b/tiledb/sm/tile/test/unit_arithmetic.cc index 37c016dafe1..43017224817 100644 --- a/tiledb/sm/tile/test/unit_arithmetic.cc +++ b/tiledb/sm/tile/test/unit_arithmetic.cc @@ -5,6 +5,7 @@ #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/misc/types.h" #include "tiledb/sm/tile/arithmetic.h" +#include "tiledb/sm/tile/test/arithmetic.h" #include "tiledb/type/range/range.h" #include @@ -340,31 +341,12 @@ std::optional instance_domain_tile_offset( ASSERTER(adjusted_domain.has_value()); - uint64_t num_tiles_result = 1; - for (uint64_t d = 0; d < tile_extents.size(); d++) { - const uint64_t num_tiles_this_dimension = - sm::Dimension::tile_idx( - adjusted_domain.value()[d].end_as(), - adjusted_domain.value()[d].start_as(), - tile_extents[d]) + - 1; - num_tiles_result *= num_tiles_this_dimension; - } + const uint64_t num_tiles_result = + compute_num_tiles(tile_extents, adjusted_domain.value()); ASSERTER(num_tiles_result == num_tiles); - const std::vector hyperrow_sizes = - compute_hyperrow_sizes(tile_order, tile_extents, domain); - - uint64_t start_tile_result = 0; - for (uint64_t di = 0; di < tile_extents.size(); di++) { - const uint64_t d = - (tile_order == Layout::ROW_MAJOR ? di : tile_extents.size() - di - 1); - const uint64_t start_tile_this_dimension = sm::Dimension::tile_idx( - adjusted_domain.value()[d].start_as(), - domain[d].start_as(), - tile_extents[d]); - start_tile_result += start_tile_this_dimension * hyperrow_sizes[di + 1]; - } + const uint64_t start_tile_result = compute_start_tile( + tile_order, tile_extents, domain, adjusted_domain.value()); ASSERTER(start_tile_result == start_tile); return adjusted_domain; From 591460655c0aa2a62d09622a30a7cd1b93306473 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 11:02:05 -0400 Subject: [PATCH 059/109] More precise assertion of fragments covering the subarray --- test/src/unit-cppapi-max-fragment-size.cc | 170 ++++++++++------------ 1 file changed, 79 insertions(+), 91 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 1c61a2ecddc..7466849f4f8 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -38,6 +38,7 @@ #include "test/support/src/array_templates.h" #include "test/support/src/helpers.h" #include "tiledb/api/c_api/fragment_info/fragment_info_api_internal.h" +#include "tiledb/api/c_api/subarray/subarray_api_internal.h" #include "tiledb/common/arithmetic.h" #include "tiledb/common/scoped_executor.h" #include "tiledb/common/stdx_string.h" @@ -45,6 +46,7 @@ #include "tiledb/sm/cpp_api/tiledb" #include "tiledb/sm/misc/constants.h" #include "tiledb/sm/query/writers/global_order_writer.h" +#include "tiledb/sm/tile/test/arithmetic.h" #include "tiledb/sm/tile/tile.h" #include @@ -599,6 +601,8 @@ instance_dense_global_order( num_tiles_per_hyperrow *= dimensions[dim].num_tiles(subarray[dim]); } + sm::NDRange smsubarray; + // write data, should be split into multiple fragments { Array array(ctx, array_name, TILEDB_WRITE); @@ -661,6 +665,8 @@ instance_dense_global_order( } query.finalize(); + + smsubarray = sub.ptr()->subarray()->ndrange(0); } // then read back @@ -699,59 +705,69 @@ instance_dense_global_order( } // the fragments are not always emitted in the same order, sort them - std::sort( - fragment_domains.begin(), - fragment_domains.end(), - [&](const auto& left, const auto& right) -> bool { - for (uint64_t d = 0; d < dimensions.size(); d++) { - if (left[d].lower_bound < right[d].lower_bound) { - return true; - } else if (left[d].lower_bound > right[d].lower_bound) { - return false; - } else if (left[d].upper_bound < right[d].upper_bound) { - return true; - } else if (left[d].upper_bound > right[d].upper_bound) { - return false; - } - } + auto domain_cmp = [&](const auto& left, const auto& right) { + for (uint64_t d = 0; d < dimensions.size(); d++) { + if (left[d].lower_bound < right[d].lower_bound) { + return true; + } else if (left[d].lower_bound > right[d].lower_bound) { return false; + } else if (left[d].upper_bound < right[d].upper_bound) { + return true; + } else if (left[d].upper_bound > right[d].upper_bound) { + return false; + } + } + return false; + }; + std::vector fragments_in_order(finfo.fragment_num()); + std::iota(fragments_in_order.begin(), fragments_in_order.end(), 0); + std::sort( + fragments_in_order.begin(), + fragments_in_order.end(), + [&](const uint32_t f_left, const uint32_t f_right) -> bool { + const auto& left = fragment_domains[f_left]; + const auto& right = fragment_domains[f_right]; + return domain_cmp(left, right); }); + std::sort(fragment_domains.begin(), fragment_domains.end(), domain_cmp); + + std::vector tile_extents; + for (const auto& dimension : dimensions) { + tile_extents.push_back(dimension.extent); + } // validate fragment domains ASSERTER(!fragment_domains.empty()); - ASSERTER(fragment_domains[0][0].lower_bound == subarray[0].lower_bound); - ASSERTER(fragment_domains.back()[0].upper_bound == subarray[0].upper_bound); - for (uint32_t f = 0; f < fragment_domains.size(); f++) { - if (tile_order == TILEDB_ROW_MAJOR) { - // first dimension is ranging and contiguous - if (f > 0) { - ASSERTER( - fragment_domains[f - 1][0].upper_bound + 1 == - fragment_domains[f][0].lower_bound); - } - // non-first dimensions should match - for (uint64_t d = 1; d < dimensions.size(); d++) { - ASSERTER(fragment_domains[f][d] == subarray[d]); - } - } else { - // last dimension is ranging and contiguous - const uint64_t num_dims = dimensions.size(); - if (f > 0) { - ASSERTER( - fragment_domains[f - 1][num_dims - 1].upper_bound + 1 == - fragment_domains[f][num_dims - 1].lower_bound); - } - // non-last dimensions should match - for (uint64_t d = 0; d < num_dims - 1; d++) { - ASSERTER(fragment_domains[f][d] == subarray[d]); - } - } + + // fragment domains should be contiguous in global order and cover the whole + // subarray + uint64_t subarray_tile_offset = 0; + for (uint32_t f = 0; f < fragments_in_order.size(); f++) { + const sm::NDRange& internal_domain = + finfo.ptr() + ->fragment_info() + ->single_fragment_info_vec()[fragments_in_order[f]] + .non_empty_domain(); + + const uint64_t f_num_tiles = + compute_num_tiles(tile_extents, internal_domain); + const uint64_t f_start_tile = compute_start_tile( + static_cast(tile_order), + tile_extents, + smsubarray, + internal_domain); + + ASSERTER(f_start_tile == subarray_tile_offset); + subarray_tile_offset += f_num_tiles; } + ASSERTER( + subarray_tile_offset == + compute_num_tiles(tile_extents, smsubarray)); - auto meta_size = [&finfo](uint32_t f) -> uint64_t { + auto meta_size = [&](uint32_t f) -> uint64_t { return finfo.ptr() ->fragment_info() - ->single_fragment_info_vec()[f] + ->single_fragment_info_vec()[fragments_in_order[f]] .meta() ->fragment_meta_size(); }; @@ -799,6 +815,8 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { << ", cell_order = " << sm::layout_str(static_cast(cell_order))) { // each tile is a full row of a 2D array + // NB: since each tile is a whole row we observe the same results regardless + // of tile order SECTION("Row tiles") { using Dim = templates::Dimension; using Dom = templates::Domain; @@ -820,54 +838,24 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { DYNAMIC_SECTION( "num_rows = " << num_rows << ", write_unit_num_cells = " << write_unit_num_cells) { - if (tile_order == TILEDB_COL_MAJOR && num_rows > 1) { - // Consider the following example: - // - // [ 1 1 1 1 2 2 2 2 ] - // [ 3 3 3 3 4 4 4 4 ] - // [ 5 5 5 5 6 6 6 6 ] - // [ 7 7 7 7 8 8 8 8 ] - // - // In row major order we can see that there are two tiles per - // "hyper-row". In column major order instead the tiles are placed [1 - // 3 5 7 2 4 6 8]. A "hyperrow" is not formed until tile 7 is - // written... i.e the number of rows. - // - // But wait, this example only has one tile per row! And indeed this - // does mean that each tile can be its own hyper-row again. For - // simplicity we elect not to implement that special case. - const auto expect = Catch::Matchers::ContainsSubstring( - "Fragment size is too small to subdivide dense subarray into " - "multiple fragments"); - REQUIRE_THROWS( - instance_dense_global_order( - ctx, - tile_order, - cell_order, - max_fragment_size, - dimensions, - subarray), - expect); - } else { - const auto actual = instance_dense_global_order( - ctx, - tile_order, - cell_order, - max_fragment_size, - dimensions, - subarray, - write_unit_num_cells == 0 ? - std::nullopt : - std::optional{write_unit_num_cells}); - - std::vector> expect; - for (uint64_t r = 0; r < num_rows; r++) { - expect.push_back( - {Dom(base_d1 + r, base_d1 + r), Dom(0, span_d2 - 1)}); - } - - CHECK(expect == actual); + const auto actual = instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional{write_unit_num_cells}); + + std::vector> expect; + for (uint64_t r = 0; r < num_rows; r++) { + expect.push_back( + {Dom(base_d1 + r, base_d1 + r), Dom(0, span_d2 - 1)}); } + + CHECK(expect == actual); } } From 7e6dc22c9beb12bdd24aeafaff9eac495df2bb42 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 11:07:23 -0400 Subject: [PATCH 060/109] Flat rectangular prism tiles --- test/src/unit-cppapi-max-fragment-size.cc | 112 ++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 7466849f4f8..01738348cec 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -956,6 +956,118 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { } } } + + // Each tile is a rectangular prism of height 1 + // Use the same inputs as above except there is a third outer dimension with + // extent 1 + SECTION("Flat rectangular prism tiles") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + const uint64_t d0_extent = 1; + const Dom d0_height(0, 0); + + const uint64_t d1_extent = GENERATE(8, 4); + constexpr size_t d2_span = 10000; + REQUIRE(d2_span % d1_extent == 0); // for row major + + const uint64_t d1_subarray = 16; + REQUIRE(d2_span % d1_subarray == 0); // for column major + + const std::vector dimensions = { + Dim(0, std::numeric_limits::max() - 1, d0_extent), + Dim(0, std::numeric_limits::max() - 1, d1_extent), + Dim(0, d2_span - 1, d2_span / d1_extent)}; + + const uint64_t d1_start_offset = GENERATE(0, 1); + const uint64_t d1_end_offset = GENERATE(0, 1); + const uint64_t d1_start = 100 + d1_start_offset; + const uint64_t d1_end = d1_start + d1_subarray - 1 - d1_end_offset; + const std::vector subarray = { + d0_height, Dom(d1_start, d1_end), Dom(0, d2_span - 1)}; + + const uint64_t max_fragment_size = 4 * 64 * 1024; + + const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); + + DYNAMIC_SECTION( + "start_offset = " + << d1_start_offset << ", end_offset = " << d1_end_offset + << ", extent = " << d1_extent + << ", write_unit_num_cells = " << write_unit_num_cells) { + if (d1_extent == 8) { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray), + expect); + } else if (d1_start_offset + d1_end_offset > 0) { + // if this constraint is ever relaxed this test must be extended + // with new inputs which are offset within a tile + const auto expect = Catch::Matchers::ContainsSubstring( + "the subarray must coincide with the tile bounds"); + REQUIRE_THROWS(instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional(write_unit_num_cells))); + } else { + std::vector> expect; + if (tile_order == TILEDB_ROW_MAJOR) { + expect = { + {d0_height, + Dom(d1_start + 0 * d1_extent, d1_start + 1 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {d0_height, + Dom(d1_start + 1 * d1_extent, d1_start + 2 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {d0_height, + Dom(d1_start + 2 * d1_extent, d1_start + 3 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {d0_height, + Dom(d1_start + 3 * d1_extent, d1_start + 4 * d1_extent - 1), + Dom(0, d2_span - 1)}}; + } else { + expect = { + {d0_height, + Dom(d1_start, d1_start + d1_subarray - 1), + Dom(0 * (d2_span / 4), 1 * (d2_span / 4) - 1)}, + {d0_height, + Dom(d1_start, d1_start + d1_subarray - 1), + Dom(1 * (d2_span / 4), 2 * (d2_span / 4) - 1)}, + {d0_height, + Dom(d1_start, d1_start + d1_subarray - 1), + Dom(2 * (d2_span / 4), 3 * (d2_span / 4) - 1)}, + {d0_height, + Dom(d1_start, d1_start + d1_subarray - 1), + Dom(3 * (d2_span / 4), 4 * (d2_span / 4) - 1)}, + }; + } + + const auto actual = instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray); + + CHECK(expect == actual); + } + } + } } // examples found from the rapidcheck test From dc04226911c1a351d9b600eaf16431eb5557d398 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 11:19:57 -0400 Subject: [PATCH 061/109] If fragment size is not set always flush tiles --- tiledb/sm/query/query.cc | 3 +-- tiledb/sm/query/query.h | 2 +- tiledb/sm/query/writers/global_order_writer.cc | 7 ++++--- tiledb/sm/query/writers/global_order_writer.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 5fa194b2b43..7f09475788a 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -132,7 +132,6 @@ Query::Query( , remote_query_(false) , is_dimension_label_ordered_read_(false) , dimension_label_increasing_(true) - , fragment_size_(std::numeric_limits::max()) , memory_budget_(memory_budget) , query_remote_buffer_storage_(std::nullopt) , default_channel_{make_shared(HERE(), *this, 0)} { @@ -1638,7 +1637,7 @@ Status Query::submit() { } // Make sure fragment size is only set for global order. - if (fragment_size_ != std::numeric_limits::max() && + if (fragment_size_.has_value() && (layout_ != Layout::GLOBAL_ORDER || type_ != QueryType::WRITE)) { throw QueryException( "[submit] Fragment size is only supported for global order writes."); diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index 4da5fe8444f..c33d32bd559 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -1110,7 +1110,7 @@ class Query { * * Note: This is only used for global order writes. */ - uint64_t fragment_size_; + std::optional fragment_size_; /** * Memory budget. If set to nullopt, the value will be obtained from the diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 1f20f53426b..e405ff67024 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -180,7 +180,7 @@ GlobalOrderWriter::GlobalOrderWriter( stats::Stats* stats, shared_ptr logger, StrategyParams& params, - uint64_t fragment_size, + std::optional fragment_size, std::vector& written_fragment_info, bool disable_checks_consolidation, std::vector& processed_conditions, @@ -1643,7 +1643,8 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( // NB: normally this should only hit once, but if there is a single // tile larger than the max fragment size it could hit twice and error - while (running_tiles_size + tile_size > max_fragment_size_) { + while (running_tiles_size + tile_size > + max_fragment_size_.value_or(std::numeric_limits::max())) { if (running_tiles_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to write a single tile"); @@ -1664,7 +1665,7 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( fragment_end = std::nullopt; } - if (!subarray_tile_offset.has_value() || + if (!subarray_tile_offset.has_value() || !max_fragment_size_.has_value() || is_rectangular_domain( array_schema_, subarray_.ndrange(0), diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index 7991e3ec432..3f2ad7dd789 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -145,7 +145,7 @@ class GlobalOrderWriter : public WriterBase { stats::Stats* stats, shared_ptr logger, StrategyParams& params, - uint64_t fragment_size, + std::optional fragment_size, std::vector& written_fragment_info, bool disable_checks_consolidation, std::vector& processed_conditions, @@ -237,7 +237,7 @@ class GlobalOrderWriter : public WriterBase { * The desired fragment size, in bytes. The writer will create a new fragment * once this size has been reached. */ - uint64_t max_fragment_size_; + std::optional max_fragment_size_; /** * Size currently written to the fragment. From 713c541692477795dca05645e16587aa8a576147 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 11:43:42 -0400 Subject: [PATCH 062/109] Add shrinking examples --- test/src/unit-cppapi-max-fragment-size.cc | 60 +++++++++++++++++++---- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 01738348cec..40c90e67934 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1229,10 +1229,24 @@ void rapidcheck_dense_array( TEST_CASE( "C++ API: Max fragment size dense array rapidcheck 1d", "[cppapi][max-frag-size][rapidcheck]") { + static constexpr auto DT = sm::Datatype::UINT64; + using Dim64 = templates::Dimension
; + using Dom64 = Dim64::domain_type; + Context ctx; + + SECTION("Shrinking") { + instance_dense_global_order( + ctx, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 2396, + {Dim64(0, 8929, 594)}, + {Dom64(0, 2969)}); + } + rc::prop("max fragment size dense 1d", [&ctx]() { - static constexpr auto DT = sm::Datatype::UINT64; - templates::Dimension
d1 = *rc::make_dimension
(8192); + Dim64 d1 = *rc::make_dimension
(8192); rapidcheck_dense_array
(ctx, {d1}); }); @@ -1241,11 +1255,25 @@ TEST_CASE( TEST_CASE( "C++ API: Max fragment size dense array rapidcheck 2d", "[cppapi][max-frag-size][rapidcheck]") { + static constexpr auto DT = sm::Datatype::UINT64; + using Dim64 = templates::Dimension
; + using Dom64 = Dim64::domain_type; + Context ctx; + + SECTION("Shrinking") { + instance_dense_global_order( + ctx, + TILEDB_ROW_MAJOR, + TILEDB_COL_MAJOR, + 48, + {Dim64(0, 116, 1), Dim64(0, 0, 1)}, + {Dom64(2, 20), Dom64(0, 0)}); + } + rc::prop("max fragment size dense 2d", [&ctx]() { - static constexpr auto DT = sm::Datatype::UINT64; - templates::Dimension
d1 = *rc::make_dimension
(128); - templates::Dimension
d2 = *rc::make_dimension
(128); + Dim64 d1 = *rc::make_dimension
(128); + Dim64 d2 = *rc::make_dimension
(128); rapidcheck_dense_array
(ctx, {d1, d2}); }); @@ -1254,12 +1282,26 @@ TEST_CASE( TEST_CASE( "C++ API: Max fragment size dense array rapidcheck 3d", "[cppapi][max-frag-size][rapidcheck]") { + static constexpr auto DT = sm::Datatype::UINT64; + using Dim64 = templates::Dimension
; + using Dom64 = Dim64::domain_type; + Context ctx; + + SECTION("Shrinking") { + instance_dense_global_order( + ctx, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 2160, + {Dim64(0, 85, 5), Dim64(0, 102, 2), Dim64(0, 37, 1)}, + {Dom64(5, 19), Dom64(4, 15), Dom64(1, 6)}); + } + rc::prop("max fragment size dense 3d", [&ctx]() { - static constexpr auto DT = sm::Datatype::UINT64; - templates::Dimension
d1 = *rc::make_dimension
(32); - templates::Dimension
d2 = *rc::make_dimension
(32); - templates::Dimension
d3 = *rc::make_dimension
(32); + Dim64 d1 = *rc::make_dimension
(32); + Dim64 d2 = *rc::make_dimension
(32); + Dim64 d3 = *rc::make_dimension
(32); rapidcheck_dense_array
(ctx, {d1, d2, d3}); }); From 3b45c6e2da2605f0f554e1c783ad3c705de0db4d Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 13:01:50 -0400 Subject: [PATCH 063/109] Clamp domain_tile_offset output to the domain arg --- tiledb/sm/tile/arithmetic.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tiledb/sm/tile/arithmetic.h b/tiledb/sm/tile/arithmetic.h index 1bf2c0daf00..e914ce0f52d 100644 --- a/tiledb/sm/tile/arithmetic.h +++ b/tiledb/sm/tile/arithmetic.h @@ -159,7 +159,9 @@ static std::optional domain_tile_offset( const T end = domain[d].start_as() + (this_dimension_end_tile * tile_extents[d]) + tile_extents[d] - 1; - r[d] = Range(start, end); + r[d] = Range( + std::max(domain[d].start_as(), start), + std::min(domain[d].end_as(), end)); } return r; From e4c2e3f392e38030ba539e8594cfd0fcfeb7c3ea Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 15:00:52 -0400 Subject: [PATCH 064/109] Pad subarray to domain tiles --- tiledb/sm/query/writers/global_order_writer.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index e405ff67024..d79f6d710fa 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -78,6 +78,11 @@ static bool is_rectangular_domain( uint64_t num_tiles) { const Domain& arraydomain = arrayschema.domain(); + // NB: ordinary write subarray must be tile aligned but the consolidation + // subarray is not required to be + NDRange arraydomain_aligned = domain; + arraydomain.expand_to_tiles_when_no_current_domain(arraydomain_aligned); + auto impl = [&](T) { if constexpr (TileDBIntegral) { std::vector tile_extents; @@ -89,7 +94,7 @@ static bool is_rectangular_domain( return is_rectangular_domain( arrayschema.tile_order(), tile_extents, - domain, + arraydomain_aligned, start_tile, num_tiles); } else { @@ -114,6 +119,11 @@ static std::optional domain_tile_offset( uint64_t num_tiles) { const Domain& arraydomain = arrayschema.domain(); + // NB: ordinary write subarray must be tile aligned but the consolidation + // subarray is not required to be + NDRange arraydomain_aligned = domain; + arraydomain.expand_to_tiles_when_no_current_domain(arraydomain_aligned); + auto impl = [&](T) { if constexpr (TileDBIntegral) { std::vector tile_extents; @@ -125,7 +135,7 @@ static std::optional domain_tile_offset( return domain_tile_offset( arrayschema.tile_order(), tile_extents, - domain, + arraydomain_aligned, start_tile, num_tiles); } else { From c7eaf8e8c3786970bcc32807c6a68712b72df3e9 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 15:01:41 -0400 Subject: [PATCH 065/109] REQUIRE_THROWS needs matcher arg --- test/src/unit-cppapi-max-fragment-size.cc | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 40c90e67934..57bb4a4e25a 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -909,16 +909,18 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { // with new inputs which are offset within a tile const auto expect = Catch::Matchers::ContainsSubstring( "the subarray must coincide with the tile bounds"); - REQUIRE_THROWS(instance_dense_global_order( - ctx, - tile_order, - cell_order, - max_fragment_size, - dimensions, - subarray, - write_unit_num_cells == 0 ? - std::nullopt : - std::optional(write_unit_num_cells))); + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional(write_unit_num_cells)), + expect); } else { std::vector> expect; if (tile_order == TILEDB_ROW_MAJOR) { From 325b9435d2d88fe23028dd7601dc740ad8c97a89 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 15:11:11 -0400 Subject: [PATCH 066/109] Revert "Domain::update_cell_num_per_tile" This reverts commit 59c77aaad19bb0932a42bfcb188876dd237a9074. --- tiledb/sm/array_schema/domain.cc | 56 +++++++++++++++++--------------- tiledb/sm/array_schema/domain.h | 11 ++----- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/tiledb/sm/array_schema/domain.cc b/tiledb/sm/array_schema/domain.cc index bde13d0ed98..bef2c6b8ad9 100644 --- a/tiledb/sm/array_schema/domain.cc +++ b/tiledb/sm/array_schema/domain.cc @@ -105,7 +105,7 @@ Domain::Domain( } // Compute number of cells per tile - update_cell_num_per_tile(); + compute_cell_num_per_tile(); // Compute number of cells per tile set_tile_cell_order_cmp_funcs(); @@ -134,7 +134,7 @@ void Domain::add_dimension(shared_ptr dim) { ++dim_num_; // Compute number of cells per tile - update_cell_num_per_tile(); + compute_cell_num_per_tile(); } bool Domain::all_dims_fixed() const { @@ -187,7 +187,7 @@ bool Domain::all_dims_same_type() const { } uint64_t Domain::cell_num_per_tile() const { - return cell_num_per_tile_.value_or(0); + return cell_num_per_tile_; } template <> @@ -716,30 +716,38 @@ int Domain::tile_order_cmp( /* PRIVATE METHODS */ /* ****************************** */ -std::optional Domain::compute_cell_num_per_tile() const { +void Domain::compute_cell_num_per_tile() { // Applicable to dimensions that have the same type if (!all_dims_same_type()) - return std::nullopt; + return; // Invoke the proper templated function auto type{dimension_ptrs_[0]->type()}; switch (type) { case Datatype::INT32: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; case Datatype::INT64: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; case Datatype::INT8: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; case Datatype::UINT8: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; case Datatype::INT16: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; case Datatype::UINT16: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; case Datatype::UINT32: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; case Datatype::UINT64: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; case Datatype::DATETIME_YEAR: case Datatype::DATETIME_MONTH: case Datatype::DATETIME_WEEK: @@ -762,33 +770,29 @@ std::optional Domain::compute_cell_num_per_tile() const { case Datatype::TIME_PS: case Datatype::TIME_FS: case Datatype::TIME_AS: - return compute_cell_num_per_tile(); + compute_cell_num_per_tile(); + break; default: - return std::nullopt; + return; } } template -std::optional Domain::compute_cell_num_per_tile() const { +void Domain::compute_cell_num_per_tile() { // Applicable only to integer domains if (!std::is_integral::value) - return std::nullopt; + return; // Applicable only to non-NULL space tiles if (null_tile_extents()) - return std::nullopt; + return; - uint64_t cell_num_per_tile = 1; + cell_num_per_tile_ = 1; for (unsigned d = 0; d < dim_num_; ++d) { auto tile_extent = *(const T*)this->tile_extent(d).data(); - cell_num_per_tile = - Dimension::tile_extent_mult(cell_num_per_tile, tile_extent); + cell_num_per_tile_ = + Dimension::tile_extent_mult(cell_num_per_tile_, tile_extent); } - return cell_num_per_tile; -} - -void Domain::update_cell_num_per_tile() { - cell_num_per_tile_ = compute_cell_num_per_tile(); } void Domain::set_tile_cell_order_cmp_funcs() { diff --git a/tiledb/sm/array_schema/domain.h b/tiledb/sm/array_schema/domain.h index 4132f2dbc6e..fa6a1a7188a 100644 --- a/tiledb/sm/array_schema/domain.h +++ b/tiledb/sm/array_schema/domain.h @@ -496,7 +496,7 @@ class Domain { shared_ptr memory_tracker_; /** The number of cells per tile. Meaningful only for the **dense** case. */ - std::optional cell_num_per_tile_; + uint64_t cell_num_per_tile_; /** The cell order of the array the domain belongs to. */ Layout cell_order_; @@ -599,7 +599,7 @@ class Domain { const Dimension* dim, const void* coord_a, const void* coord_b); /** Compute the number of cells per tile. */ - std::optional compute_cell_num_per_tile() const; + void compute_cell_num_per_tile(); /** * Compute the number of cells per tile. @@ -608,12 +608,7 @@ class Domain { * @return void */ template - std::optional compute_cell_num_per_tile() const; - - /** - * Computes and updates the number of cells per tile. - */ - void update_cell_num_per_tile(); + void compute_cell_num_per_tile(); /** Prepares the comparator functions for each dimension. */ void set_tile_cell_order_cmp_funcs(); From 25903771ece75d36420ddd4217627b48c2d46a01 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 15:13:19 -0400 Subject: [PATCH 067/109] Revert "Attribute::set_fill_value" This reverts commit 153f1b0b6ad78ce72af2739d8c2241fe9c84bb80. --- tiledb/sm/cpp_api/attribute.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tiledb/sm/cpp_api/attribute.h b/tiledb/sm/cpp_api/attribute.h index 03ca2d696ef..6aacce384eb 100644 --- a/tiledb/sm/cpp_api/attribute.h +++ b/tiledb/sm/cpp_api/attribute.h @@ -275,11 +275,6 @@ class Attribute { return *this; } - template - Attribute& set_fill_value(T value) { - return set_fill_value(static_cast(&value), sizeof(T)); - } - /** * Gets the default fill value for the input attribute. This value will * be used for the input attribute whenever querying (1) an empty cell in From 050b976e2ac285b1f308f53e7c6ae5e60fba0a45 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 15:17:16 -0400 Subject: [PATCH 068/109] Remove duplicate rapidcheck checked_sub function --- .../rapidcheck/array_schema_templates.h | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/test/support/rapidcheck/array_schema_templates.h b/test/support/rapidcheck/array_schema_templates.h index 084be02935d..642a25d5b6f 100644 --- a/test/support/rapidcheck/array_schema_templates.h +++ b/test/support/rapidcheck/array_schema_templates.h @@ -76,24 +76,6 @@ struct Arbitrary> { } }; -/** - * @return `a - b` if it does not overflow, `std::nullopt` if it does - */ -template -std::optional checked_sub(T a, T b) { - if (!std::is_signed::value) { - return (b > a ? std::nullopt : std::optional(a - b)); - } else if (b < 0) { - return ( - std::numeric_limits::max() + b < a ? std::nullopt : - std::optional(a - b)); - } else { - return ( - std::numeric_limits::min() - b > a ? std::nullopt : - std::optional(a - b)); - } -} - template Gen make_extent( const templates::Domain& domain, std::optional bound = std::nullopt) { @@ -118,8 +100,8 @@ Gen make_extent( D extent_lower_bound = 1; D extent_upper_bound; - const auto bound_distance = - checked_sub(domain.upper_bound, domain.lower_bound); + const auto bound_distance = tiledb::common::checked_arithmetic::sub( + domain.upper_bound, domain.lower_bound); if (bound_distance.has_value()) { extent_upper_bound = (bound_distance.value() < extent_bound ? bound_distance.value() + 1 : From 46589a404c76403b1bfc823fa727f335c15f9329 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 15:21:08 -0400 Subject: [PATCH 069/109] Revert "NDRangeTypedAccess" This reverts commit 889cbc8d749660f35a526767011debe963d28ee2. --- tiledb/sm/misc/types.h | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/tiledb/sm/misc/types.h b/tiledb/sm/misc/types.h index 914a35c9a15..22b3ca2add2 100644 --- a/tiledb/sm/misc/types.h +++ b/tiledb/sm/misc/types.h @@ -47,31 +47,6 @@ namespace tiledb::sm { /** An N-dimensional range, consisting of a vector of 1D ranges. */ using NDRange = std::vector; -/** - * Helper functions for more concisely accessing or manipulating - * fields of a range with static typing. - * - * Useful with `apply_with_type`. - */ -template -struct NDRangeTypedAccess { - static T& lower_bound(NDRange& range, uint64_t dim) { - return *static_cast(range[dim].start_fixed()); - } - - static const T& lower_bound(const NDRange& range, uint64_t dim) { - return *static_cast(range[dim].start_fixed()); - } - - static T& upper_bound(NDRange& range, uint64_t dim) { - return *static_cast(range[dim].end_fixed()); - } - - static const T& upper_bound(const NDRange& range, uint64_t dim) { - return *static_cast(range[dim].end_fixed()); - } -}; - /** An untyped value, barely more than raw storage. This class is only * transitional. All uses should be rewritten to use ordinary types. Consider * it deprecated at creation. From 0ffb91f835f6d628d34871ef38fde214e083ac0d Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 22:09:35 -0400 Subject: [PATCH 070/109] Fix domain padding in domain_tile_offset --- tiledb/sm/query/writers/global_order_writer.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index d79f6d710fa..32ee22309b9 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -120,7 +120,7 @@ static std::optional domain_tile_offset( const Domain& arraydomain = arrayschema.domain(); // NB: ordinary write subarray must be tile aligned but the consolidation - // subarray is not required to be + // subarray is not required to be. Align for purposes of tile arithmetic. NDRange arraydomain_aligned = domain; arraydomain.expand_to_tiles_when_no_current_domain(arraydomain_aligned); @@ -132,16 +132,25 @@ static std::optional domain_tile_offset( tile_extents.push_back(arraydomain.tile_extent(d).rvalue_as()); } - return domain_tile_offset( + std::optional r = domain_tile_offset( arrayschema.tile_order(), tile_extents, arraydomain_aligned, start_tile, num_tiles); + if (r.has_value()) { + // aligning to the array domain may have extended beyond the subarray, + // clamp the result back within the subarray bounds + for (uint64_t d = 0; d < arraydomain.dim_num(); d++) { + tiledb::type::crop_range(domain[d], r.value()[d]); + } + } + return r; } else { return std::optional{}; } }; + return apply_with_type(impl, arraydomain.dimension_ptr(0)->type()); } From 646a72677a6eb06fddc822ea74bbffa1878e7095 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 22:26:14 -0400 Subject: [PATCH 071/109] Fix fragment order in size comparisons --- test/src/unit-cppapi-max-fragment-size.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 57bb4a4e25a..f6b5880a47b 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -767,14 +767,14 @@ instance_dense_global_order( auto meta_size = [&](uint32_t f) -> uint64_t { return finfo.ptr() ->fragment_info() - ->single_fragment_info_vec()[fragments_in_order[f]] + ->single_fragment_info_vec()[f] .meta() ->fragment_meta_size(); }; // validate fragment size - no fragment should be larger than max requested // size - for (uint32_t f = 0; f < finfo.fragment_num(); f++) { + for (uint32_t f : fragments_in_order) { const uint64_t fsize = finfo.fragment_size(f); const uint64_t fmetasize = meta_size(f); ASSERTER(fsize <= max_fragment_size + fmetasize); @@ -782,10 +782,12 @@ instance_dense_global_order( // validate fragment size - we wrote the largest possible fragments (no two // adjacent should be under max fragment size) - for (uint32_t f = 1; f < finfo.fragment_num(); f++) { + for (uint32_t fi = 1; fi < fragments_in_order.size(); fi++) { + const uint32_t fprev = fragments_in_order[fi - 1]; + const uint32_t fcur = fragments_in_order[fi]; const uint64_t combined_size = - finfo.fragment_size(f - 1) + finfo.fragment_size(f); - const uint64_t combined_meta_size = meta_size(f - 1) + meta_size(f); + finfo.fragment_size(fprev) + finfo.fragment_size(fcur); + const uint64_t combined_meta_size = meta_size(fprev) + meta_size(fcur); ASSERTER(combined_size > max_fragment_size + combined_meta_size); } From 0722fbe993c6e02d5990272557207d36facd2e5a Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 22:27:02 -0400 Subject: [PATCH 072/109] Update GlobalOrderWriter comments/docs --- .../sm/query/writers/global_order_writer.cc | 41 +++----------- tiledb/sm/query/writers/global_order_writer.h | 54 ++++++++++++++++--- 2 files changed, 54 insertions(+), 41 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 32ee22309b9..ca474199b70 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -161,36 +161,6 @@ class GlobalOrderWriterException : public StatusException { } }; -namespace global_order_writer { - -/** - * Contains the return values of - * `GlobalOrderWriter::identify_fragment_tile_boundaries`. - */ -struct FragmentTileBoundaries { - /** - * The offsets where each complete fragment starts. - */ - std::vector tile_offsets_; - - /** - * The number of writeable tiles. - * For sparse arrays this is the number of tiles of input. - * For dense arrays this may be less if there is a trail of tiles which cannot - * be guaranteed to fit within `max_fragment_size` while also forming a - * rectangular domain. - */ - uint64_t num_writeable_tiles_; - - /** - * The size in bytes of the filtered tiles which are written to the last - * fragment. The last fragment may be resumed by a subsequent `submit`. - */ - uint64_t last_fragment_size_; -}; - -} // namespace global_order_writer - /* ****************************** */ /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ @@ -772,9 +742,12 @@ Status GlobalOrderWriter::finalize_global_write_state() { iassert(global_write_state_->frag_meta_); throw_if_not_ok(populate_fragment( global_write_state_->last_tiles_, 0, num_remaining)); - // NB: there is a possibility here that we write a tile bigger than the + + // FIXME: there is a possibility here that we write a tile bigger than the // max fragment size if these remaining tiles fill it up and then the last - // tile runs over... we can live with that right? + // tile runs over... in this case we need to do the rectangle thing all + // over again so as to avoid writing a fragment which exceeds the max + // fragment size. } } else { iassert(global_write_state_->last_tiles_.begin()->second.size() <= 1); @@ -1606,7 +1579,7 @@ Status GlobalOrderWriter::prepare_full_tiles_var( * @return a list of (fragment size, tile offset) pairs identifying the division * of input data into target fragments */ -global_order_writer::FragmentTileBoundaries +GlobalOrderWriter::FragmentTileBoundaries GlobalOrderWriter::identify_fragment_tile_boundaries( const tdb::pmr::unordered_map& tiles) const { @@ -1701,7 +1674,7 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( fragments.push_back(fragment_start); } - return global_order_writer::FragmentTileBoundaries{ + return GlobalOrderWriter::FragmentTileBoundaries{ .tile_offsets_ = fragments, .num_writeable_tiles_ = fragment_end.value_or(fragment_start), .last_fragment_size_ = fragment_size}; diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index 3f2ad7dd789..b3eff76d081 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -46,10 +46,6 @@ using namespace tiledb::common; namespace tiledb { namespace sm { -namespace global_order_writer { -struct FragmentTileBoundaries; -} - /** Processes write queries. */ class GlobalOrderWriter : public WriterBase { public: @@ -125,6 +121,24 @@ class GlobalOrderWriter : public WriterBase { /** * State for writing dense fragments. + * + * Dense fragments use the bounding rectangle as a precise determination + * of where the contents of the fragment are in the domain, and as such + * it must be written correctly. This is usually not a problem, however + * global order writes can: + * 1) split up a single write into multiple fragments in order to satisfy + * the `max_fragment_size_` parameter + * 2) write into a single domain over the course of multiple `submit` + * calls which each write an arbitrary subset of the domain, + * re-using the buffers + * + * Both of these make it non-trivial to determine what the domain written + * into a fragment actually was, when the fragment fills up + * `max_fragment_size`. + * + * The fields of this struct, as well as `last_tiles_` of the outer struct, + * are used to track the amount of data which the writer has already + * processed so as to keep the correct position in the target subarray. */ struct DenseWriteState { /** @@ -399,6 +413,32 @@ class GlobalOrderWriter : public WriterBase { const std::set& coord_dups, WriterTileTupleVector* tiles) const; + /** + * Contains the return values of + * `GlobalOrderWriter::identify_fragment_tile_boundaries`. + */ + struct FragmentTileBoundaries { + /** + * The offsets where each complete fragment starts. + */ + std::vector tile_offsets_; + + /** + * The number of writeable tiles. + * For sparse arrays this is the number of tiles of input. + * For dense arrays this may be less if there is a trail of tiles which + * cannot be guaranteed to fit within `max_fragment_size` while also forming + * a rectangular domain. + */ + uint64_t num_writeable_tiles_; + + /** + * The size in bytes of the filtered tiles which are written to the last + * fragment. The last fragment may be resumed by a subsequent `submit`. + */ + uint64_t last_fragment_size_; + }; + /** * Identify the manner in which the filtered input tiles map onto target * fragments. If `max_fragment_size_` is much larger than the input, this may @@ -409,10 +449,10 @@ class GlobalOrderWriter : public WriterBase { * corresponds to that fragment. * * @param tiles Map of vector of tiles, per attributes. - * @return a list of `(fragment_size, start_tile)` pairs ordered on - * `start_tile` + * + * @return see `FragmentTileBoundaries` documentation */ - global_order_writer::FragmentTileBoundaries identify_fragment_tile_boundaries( + FragmentTileBoundaries identify_fragment_tile_boundaries( const tdb::pmr::unordered_map& tiles) const; From f0de1c0b0a4efd6748e9aaa6026dd7d5ae8c4e8b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 28 Oct 2025 22:40:30 -0400 Subject: [PATCH 073/109] Add definitions to top of tile/arithmetic.h --- tiledb/sm/tile/arithmetic.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tiledb/sm/tile/arithmetic.h b/tiledb/sm/tile/arithmetic.h index e914ce0f52d..4957d0113af 100644 --- a/tiledb/sm/tile/arithmetic.h +++ b/tiledb/sm/tile/arithmetic.h @@ -29,6 +29,20 @@ * * This file provides template definitions for doing tile arithmetic, * e.g. computing new domains based on offsets and such. + * + * Definitions: + * + * **Hyperrectangle**: + * The generalization of a rectangle to higher dimensions. + * This is a standard term from mathematical literature. + * + * **Hyperrow**: + * The generalization of a row to higher dimensions. + * This does not appear to be a standard term from mathematical literature. + * A row in a 2D domain is a rectangle of height 1, i.e. spanning a single + * coordinate of the outermost "row" dimension. So, in a higher-dimensional + * plane, a hyperrow is a hyperrectangle which spans a single coordinate of the + * outermost dimension. For example, in a 3D domain a hyperrow is a plane. */ #ifndef TILEDB_TILE_ARITHMETIC_H #define TILEDB_TILE_ARITHMETIC_H @@ -78,12 +92,12 @@ static bool is_rectangular_domain( } /** - * Compute the number of tiles per hyper-row for the given `domain` with tiles + * Compute the number of tiles per hyperrow for the given `domain` with tiles * given by `tile_extents`. * * For D dimensions, the returned vector contains `D+1` elements. * Position 0 is the number of tiles in `domain`. - * For dimension `d`, position `d + 1` is the number of tiles in a hyper-row of + * For dimension `d`, position `d + 1` is the number of tiles in a hyperrow of * dimension `d` (and is thus always 1 for the final dimension). */ template From 70aa993a7e03d10ae02d8a5cf0f4d4975fa5f51b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 29 Oct 2025 12:55:58 -0400 Subject: [PATCH 074/109] pragma for __GNUC__ only --- tiledb/sm/query/writers/global_order_writer.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index ca474199b70..64bf5b90cec 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1618,13 +1618,18 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( } } + uint64_t fragment_start = 0; + std::vector fragments; + // NB: gcc has a false positive uninitialized use warning for `fragment_end` +#ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" - uint64_t fragment_start = 0; std::optional fragment_end; - std::vector fragments; #pragma GCC diagnostic pop +#else + std::optional fragment_end; +#endif // Make sure we don't write more than the desired fragment size. for (uint64_t t = 0; t < tile_num; t++) { From 4e73daa89d61cfb174cc0deeb7805893de995c1b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 29 Oct 2025 13:31:50 -0400 Subject: [PATCH 075/109] Strengthen last_tiles_ buffering check --- test/src/unit-cppapi-max-fragment-size.cc | 42 ++++++++++++++++------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index f6b5880a47b..5b7ca54b846 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -37,6 +37,7 @@ #include "test/support/src/array_helpers.h" #include "test/support/src/array_templates.h" #include "test/support/src/helpers.h" +#include "tiledb/api/c_api/array_schema/array_schema_api_internal.h" #include "tiledb/api/c_api/fragment_info/fragment_info_api_internal.h" #include "tiledb/api/c_api/subarray/subarray_api_internal.h" #include "tiledb/common/arithmetic.h" @@ -46,6 +47,7 @@ #include "tiledb/sm/cpp_api/tiledb" #include "tiledb/sm/misc/constants.h" #include "tiledb/sm/query/writers/global_order_writer.h" +#include "tiledb/sm/tile/arithmetic.h" #include "tiledb/sm/tile/test/arithmetic.h" #include "tiledb/sm/tile/tile.h" @@ -594,11 +596,9 @@ instance_dense_global_order( api_subarray.push_back(sub_dim.upper_bound); } - uint64_t num_tiles_per_hyperrow = 1; - for (uint64_t i = 0; i < dimensions.size() - 1; i++) { - const uint64_t dim = - (tile_order == TILEDB_ROW_MAJOR ? i + 1 : dimensions.size() - i - 2); - num_tiles_per_hyperrow *= dimensions[dim].num_tiles(subarray[dim]); + std::vector tile_extents; + for (const auto& dimension : dimensions) { + tile_extents.push_back(dimension.extent); } sm::NDRange smsubarray; @@ -615,6 +615,15 @@ instance_dense_global_order( query.set_subarray(sub); query.ptr().get()->query_->set_fragment_size(max_fragment_size); + smsubarray = sub.ptr()->subarray()->ndrange(0); + + sm::NDRange smsubarray_aligned = smsubarray; + array.schema() + .ptr() + ->array_schema() + ->domain() + .expand_to_tiles_when_no_current_domain(smsubarray_aligned); + uint64_t cells_written = 0; while (cells_written < a_write.size()) { const uint64_t cells_this_write = std::min( @@ -659,14 +668,26 @@ instance_dense_global_order( } // it should be an error if they exceed the max fragment size ASSERTER(in_memory_size <= max_fragment_size); + // and if they form a rectangle then we could have written some out ASSERTER(in_memory_num_tiles.has_value()); - ASSERTER(in_memory_num_tiles.value() < num_tiles_per_hyperrow); + for (uint64_t num_tiles = 0; num_tiles < in_memory_num_tiles.value(); + num_tiles++) { + const bool rectangle = sm::is_rectangular_domain( + static_cast(tile_order), + tile_extents, + smsubarray_aligned, + g->dense_.domain_tile_offset_, + g->frag_meta_->tile_index_base() + num_tiles); + if (num_tiles == 0) { + ASSERTER(rectangle); + } else { + ASSERTER(!rectangle); + } + } } query.finalize(); - - smsubarray = sub.ptr()->subarray()->ndrange(0); } // then read back @@ -731,11 +752,6 @@ instance_dense_global_order( }); std::sort(fragment_domains.begin(), fragment_domains.end(), domain_cmp); - std::vector tile_extents; - for (const auto& dimension : dimensions) { - tile_extents.push_back(dimension.extent); - } - // validate fragment domains ASSERTER(!fragment_domains.empty()); From 1be1ec3c50e4e296dca0065b743dff3291cdb6d4 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 07:28:41 -0400 Subject: [PATCH 076/109] C++ API: Test consolidation dense array with max fragment size --- test/src/unit-cppapi-consolidation.cc | 398 ++++++++++++++++++++++ test/src/unit-cppapi-max-fragment-size.cc | 106 +----- test/support/src/array_templates.h | 87 ++++- test/support/src/fragment_info_helpers.h | 164 +++++++++ 4 files changed, 657 insertions(+), 98 deletions(-) create mode 100644 test/support/src/fragment_info_helpers.h diff --git a/test/src/unit-cppapi-consolidation.cc b/test/src/unit-cppapi-consolidation.cc index 950553890c1..83d7162bebb 100644 --- a/test/src/unit-cppapi-consolidation.cc +++ b/test/src/unit-cppapi-consolidation.cc @@ -32,10 +32,15 @@ #include "tiledb/sm/cpp_api/tiledb_experimental" #include +#include "test/support/src/array_helpers.h" +#include "test/support/src/array_templates.h" +#include "test/support/src/fragment_info_helpers.h" #include "test/support/src/helpers.h" +#include "tiledb/api/c_api/array/array_api_internal.h" #include "tiledb/sm/cpp_api/tiledb" using namespace tiledb; +using namespace tiledb::test; void remove_array(const std::string& array_name) { Context ctx; @@ -538,3 +543,396 @@ TEST_CASE( remove_array(array_name); } + +template +void instance_dense_consolidation_create_array( + Context& ctx, + const std::string& array_name, + const std::vector>& domain) { + using Coord = templates::Dimension
::value_type; + + // create array + Domain arraydomain(ctx); + for (uint64_t d = 0; d < domain.size(); d++) { + const std::string dname = "d" + std::to_string(d + 1); + auto dd = Dimension::create( + ctx, + dname, + {domain[d].domain.lower_bound, domain[d].domain.upper_bound}, + domain[d].extent); + arraydomain.add_dimension(dd); + } + + ArraySchema schema(ctx, TILEDB_DENSE); + schema.set_domain(arraydomain); + + const std::vector> attributes = + templates::ddl::physical_type_attributes(); + for (uint64_t a = 0; a < attributes.size(); a++) { + const std::string aname = "a" + std::to_string(a + 1); + auto aa = Attribute::create( + ctx, + aname, + static_cast(std::get<0>(attributes[a]))) + .set_cell_val_num(std::get<1>(attributes[a])) + .set_nullable(std::get<2>(attributes[a])); + schema.add_attribute(aa); + } + + Array::create(array_name, schema); +} + +/** + * Runs an instance of a dense consolidation test. + * The `fragments` are written in ascending order from the beginning of the + * array domain. + * + * Asserts that after consolidation we get fragments which appropriately satisfy + * `max_fragment_size`: + * 1) no fragment is larger than that size + * 2) if the union of two adjacent fragments can form a rectangular domain, then + * the sum of their sizes must exceed the maximum fragment size (else they + * should be one fragment) + * + * @precondition the `fragments` each have a number of cells which is an + * integral number of tiles + */ +template < + sm::Datatype DT, + templates::FragmentType F, + typename Asserter = AsserterCatch> +std::vector::domain_type>> +instance_dense_consolidation( + Context& ctx, + const std::string& array_name, + const std::vector>& domain, + std::vector& fragments, + uint64_t max_fragment_size) { + using Coord = templates::Dimension
::value_type; + + // create array + instance_dense_consolidation_create_array(ctx, array_name, domain); + + DeleteArrayGuard arrayguard(ctx.ptr().get(), array_name.c_str()); + + sm::NDRange array_domain; + for (const auto& dim : domain) { + array_domain.push_back( + Range(dim.domain.lower_bound, dim.domain.upper_bound)); + } + + uint64_t num_cells_per_tile = 1; + std::vector tile_extents; + for (const auto& dim : domain) { + tile_extents.push_back(dim.extent); + num_cells_per_tile *= static_cast(dim.extent); + } + + // populate array + uint64_t start_tile = 0; + { + Array forwrite(ctx, array_name, TILEDB_WRITE); + for (auto& f : fragments) { + const uint64_t f_num_tiles = f.num_cells() / num_cells_per_tile; + + const std::optional subarray = domain_tile_offset( + sm::Layout::ROW_MAJOR, + tile_extents, + array_domain, + start_tile, + f_num_tiles); + ASSERTER(subarray.has_value()); + + templates::query::write_fragment( + f, forwrite, subarray.value()); + + start_tile += f_num_tiles; + } + } + + sm::NDRange non_empty_domain; + { + std::optional maybe = domain_tile_offset( + sm::Layout::ROW_MAJOR, tile_extents, array_domain, 0, start_tile); + ASSERTER(maybe.has_value()); + non_empty_domain = maybe.value(); + } + + // consolidate + Config cconfig; + cconfig["sm.consolidation.max_fragment_size"] = + std::to_string(max_fragment_size); + Array::consolidate(ctx, array_name, &cconfig); + + Array forread(ctx, array_name, TILEDB_READ); + + // sanity check the non-empty domain + // NB: cannot use `==` for some reason, the array `non_empty_domain` method + // returns `range_start_size_` zero + { + const auto actual_domain = forread.ptr()->array()->non_empty_domain(); + for (uint64_t d = 0; d < domain.size(); d++) { + ASSERTER( + non_empty_domain[d].start_as() == + actual_domain[d].start_as()); + ASSERTER( + non_empty_domain[d].end_as() == + actual_domain[d].end_as()); + } + } + + // check fragment info + FragmentInfo finfo(ctx, array_name); + finfo.load(); + + const auto fragment_domains = + collect_and_validate_fragment_domains( + ctx, + sm::Layout::ROW_MAJOR, + array_name, + tile_extents, + non_empty_domain, + max_fragment_size); + + // read back fragments to check contents + std::vector api_subarray; + api_subarray.reserve(2 * domain.size()); + for (uint64_t d = 0; d < domain.size(); d++) { + api_subarray.push_back(non_empty_domain[d].start_as()); + api_subarray.push_back(non_empty_domain[d].end_as()); + } + + F input_concatenated, output; + for (const auto& f : fragments) { + input_concatenated.extend(f); + } + output = input_concatenated; + + Subarray sub(ctx, forread); + sub.set_subarray(api_subarray); + + Query query(forread.context(), forread); + query.set_layout(TILEDB_GLOBAL_ORDER); + query.set_subarray(sub); + + // make field size locations + templates::query::fragment_field_sizes_t field_sizes = + templates::query::make_field_sizes(output, output.num_cells()); + + // add fields to query + auto outcursor = templates::query::fragment_field_sizes_t(); + templates::query::set_fields( + ctx.ptr().get(), + query.ptr().get(), + field_sizes, + output, + [](unsigned d) { return "d" + std::to_string(d + 1); }, + [](unsigned a) { return "a" + std::to_string(a + 1); }, + outcursor); + + const auto status = query.submit(); + ASSERTER(status == Query::Status::COMPLETE); + + // resize according to what was found + templates::query::apply_cursor(output, outcursor, field_sizes); + + ASSERTER(output == input_concatenated); + + return fragment_domains; +} + +/** + * Test case inspired by CORE-290. + * + */ +TEST_CASE( + "C++ API: Test consolidation dense array with max fragment size", + "[cppapi][consolidation][rest]") { + using Dim64 = templates::Dimension; + using Dom64 = Dim64::domain_type; + using DenseFragmentFixed = templates::Fragment, std::tuple>; + + const std::string array_name = "cppapi_consolidation_dense"; + + Context ctx; + + SECTION("2D") { + SECTION("Row tiles") { + const Dim64 row(0, std::numeric_limits::max() - 1, 1); + const Dim64 col(0, 99999, 100000); + + const uint64_t num_fragments = 32; + + // each input fragment is a single row + std::vector input_fragments; + for (uint64_t f = 0; f < num_fragments; f++) { + DenseFragmentFixed fdata; + fdata.resize(row.extent * col.domain.num_cells()); + + auto& att = std::get<0>(fdata.attributes()); + std::iota( + att.begin(), att.end(), static_cast(f) * fdata.num_cells()); + + input_fragments.push_back(fdata); + } + + // unfiltered, each row takes `100000 * sizeof(int)` bytes, plus some + // padding + const uint64_t tile_size = (row.extent * col.extent * sizeof(int)) + 92; + uint64_t max_fragment_size; + + SECTION("Too small") { + max_fragment_size = tile_size - 1; + } + SECTION("Snug fit") { + max_fragment_size = tile_size; + } + SECTION("Not quite two rows") { + max_fragment_size = (2 * tile_size) - 1; + } + SECTION("Two rows") { + max_fragment_size = 2 * tile_size; + } + + const uint64_t rows_per_fragment = max_fragment_size / tile_size; + DYNAMIC_SECTION( + "rows_per_fragment = " + std::to_string(rows_per_fragment)) { + if (rows_per_fragment == 0) { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, + array_name, + {row, col}, + input_fragments, + max_fragment_size), + expect); + } else { + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, max_fragment_size); + + std::vector> expect; + for (uint64_t r = 0; r < num_fragments; r += rows_per_fragment) { + expect.push_back({Dom64(r, r + rows_per_fragment - 1), col.domain}); + } + CHECK(output_fragments == expect); + } + } + } + + SECTION("Rectangle tiles") { + // FIXME + SKIP("Fails with FPE due to overflow in compute_hyperrow_sizes"); + + const Dim64 row(0, std::numeric_limits::max() - 1, 4); + const Dim64 col(0, 99999, 100000 / row.extent); + + const uint64_t num_fragments = 32; + + // each input fragment is 4 tiles, covering 4 rows of cells + std::vector input_fragments; + for (uint64_t f = 0; f < num_fragments; f++) { + DenseFragmentFixed fdata; + fdata.resize(row.extent * col.extent * row.extent); + + auto& att = std::get<0>(fdata.attributes()); + std::iota( + att.begin(), att.end(), static_cast(f) * fdata.num_cells()); + + input_fragments.push_back(fdata); + } + + // unfiltered, each row takes `100000 * sizeof(int)` bytes, plus some + // padding + const uint64_t tile_size = (row.extent * col.extent * sizeof(int)) + 92; + + SECTION("Too small") { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, tile_size - 1), + expect); + } + SECTION("One tile") { + std::vector> expect; + for (uint64_t r = 0; r < num_fragments; r++) { + for (uint64_t c = 0; c < 4; c++) { + expect.push_back( + {Dom64(r * 4, r * 4 + 3), + Dom64(col.extent * c, (col.extent * (c + 1)) - 1)}); + } + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, tile_size); + CHECK(output_fragments == expect); + } + SECTION("Two tiles") { + std::vector> expect; + for (uint64_t r = 0; r < num_fragments; r++) { + expect.push_back( + {Dom64(r * 4, r * 4 + 3), Dom64(0, (col.extent * 2) - 1)}); + expect.push_back( + {Dom64(r * 4, r * 4 + 3), + Dom64(col.extent * 2, (col.extent * 4) - 1)}); + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, tile_size); + } + SECTION("Three tiles") { + // now we have some trouble, each row is 4 tiles, 3 of them fit, + // so we will alternate fragments with 3 tiles and fragments with 1 + // tile to fill out the row, yikes + std::vector> expect; + for (uint64_t r = 0; r < num_fragments * 4; r++) { + expect.push_back( + {Dom64(r * 4, r * 4 + 3), Dom64(0, (col.extent * 3) - 1)}); + expect.push_back( + {Dom64(r * 4, r * 4 + 3), + Dom64(col.extent * 3, (col.extent * 4) - 1)}); + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, tile_size); + CHECK(output_fragments == expect); + } + SECTION("Four tiles") { + std::vector> expect; + for (uint64_t f = 0; f < num_fragments; f++) { + expect.push_back({Dom64(f * 4, f * 4 + 3), col.domain}); + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, tile_size); + CHECK(output_fragments == expect); + } + SECTION("Five tiles") { + // since we need rectangle domains this is the same as four tiles + std::vector> expect; + for (uint64_t f = 0; f < num_fragments; f++) { + expect.push_back({Dom64(f * 4, f * 4 + 3), col.domain}); + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, tile_size); + CHECK(output_fragments == expect); + } + } + } +} diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 5b7ca54b846..ef2052e0447 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -36,6 +36,7 @@ #include "test/support/rapidcheck/array_templates.h" #include "test/support/src/array_helpers.h" #include "test/support/src/array_templates.h" +#include "test/support/src/fragment_info_helpers.h" #include "test/support/src/helpers.h" #include "tiledb/api/c_api/array_schema/array_schema_api_internal.h" #include "tiledb/api/c_api/fragment_info/fragment_info_api_internal.h" @@ -709,103 +710,14 @@ instance_dense_global_order( ASSERTER(st == Query::Status::COMPLETE); } - FragmentInfo finfo(ctx, array_name); - finfo.load(); - - // collect fragment domains - std::vector>> fragment_domains; - for (uint32_t f = 0; f < finfo.fragment_num(); f++) { - std::vector> this_fragment_domain; - for (uint64_t d = 0; d < dimensions.size(); d++) { - uint64_t bounds[2]; - finfo.get_non_empty_domain(f, d, &bounds[0]); - this_fragment_domain.push_back( - templates::Domain(bounds[0], bounds[1])); - } - fragment_domains.push_back(this_fragment_domain); - } - - // the fragments are not always emitted in the same order, sort them - auto domain_cmp = [&](const auto& left, const auto& right) { - for (uint64_t d = 0; d < dimensions.size(); d++) { - if (left[d].lower_bound < right[d].lower_bound) { - return true; - } else if (left[d].lower_bound > right[d].lower_bound) { - return false; - } else if (left[d].upper_bound < right[d].upper_bound) { - return true; - } else if (left[d].upper_bound > right[d].upper_bound) { - return false; - } - } - return false; - }; - std::vector fragments_in_order(finfo.fragment_num()); - std::iota(fragments_in_order.begin(), fragments_in_order.end(), 0); - std::sort( - fragments_in_order.begin(), - fragments_in_order.end(), - [&](const uint32_t f_left, const uint32_t f_right) -> bool { - const auto& left = fragment_domains[f_left]; - const auto& right = fragment_domains[f_right]; - return domain_cmp(left, right); - }); - std::sort(fragment_domains.begin(), fragment_domains.end(), domain_cmp); - - // validate fragment domains - ASSERTER(!fragment_domains.empty()); - - // fragment domains should be contiguous in global order and cover the whole - // subarray - uint64_t subarray_tile_offset = 0; - for (uint32_t f = 0; f < fragments_in_order.size(); f++) { - const sm::NDRange& internal_domain = - finfo.ptr() - ->fragment_info() - ->single_fragment_info_vec()[fragments_in_order[f]] - .non_empty_domain(); - - const uint64_t f_num_tiles = - compute_num_tiles(tile_extents, internal_domain); - const uint64_t f_start_tile = compute_start_tile( - static_cast(tile_order), - tile_extents, - smsubarray, - internal_domain); - - ASSERTER(f_start_tile == subarray_tile_offset); - subarray_tile_offset += f_num_tiles; - } - ASSERTER( - subarray_tile_offset == - compute_num_tiles(tile_extents, smsubarray)); - - auto meta_size = [&](uint32_t f) -> uint64_t { - return finfo.ptr() - ->fragment_info() - ->single_fragment_info_vec()[f] - .meta() - ->fragment_meta_size(); - }; - - // validate fragment size - no fragment should be larger than max requested - // size - for (uint32_t f : fragments_in_order) { - const uint64_t fsize = finfo.fragment_size(f); - const uint64_t fmetasize = meta_size(f); - ASSERTER(fsize <= max_fragment_size + fmetasize); - } - - // validate fragment size - we wrote the largest possible fragments (no two - // adjacent should be under max fragment size) - for (uint32_t fi = 1; fi < fragments_in_order.size(); fi++) { - const uint32_t fprev = fragments_in_order[fi - 1]; - const uint32_t fcur = fragments_in_order[fi]; - const uint64_t combined_size = - finfo.fragment_size(fprev) + finfo.fragment_size(fcur); - const uint64_t combined_meta_size = meta_size(fprev) + meta_size(fcur); - ASSERTER(combined_size > max_fragment_size + combined_meta_size); - } + const std::vector>> fragment_domains = + collect_and_validate_fragment_domains( + ctx, + static_cast(tile_order), + array_name, + tile_extents, + smsubarray, + max_fragment_size); // this is last because a fragment domain mismatch is more informative ASSERTER(a_read == a_write); diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index de7db91c62a..def8d85a130 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -1134,6 +1134,11 @@ struct Fragment { }, std::tuple_cat(dimensions(), attributes())); } + + bool operator==(const self_type& other) const { + return dimensions() == other.dimensions() && + attributes() == other.attributes(); + } }; /** @@ -1415,7 +1420,7 @@ uint64_t num_cells(const F& fragment, const auto& field_sizes) { } /** - * Writes a fragment to an array. + * Writes a fragment to a sparse array. */ template void write_fragment( @@ -1450,10 +1455,90 @@ void write_fragment( ASSERTER(num_cells == expect_num_cells); } +/** + * Writes a fragment to a dense array. + */ +template +void write_fragment( + const Fragment& fragment, + Array& forwrite, + const sm::NDRange& subarray, + tiledb_layout_t layout = TILEDB_ROW_MAJOR) { + Query query(forwrite.context(), forwrite, TILEDB_WRITE); + query.set_layout(layout); + + std::vector coords; + for (const auto& dim : subarray) { + coords.push_back(dim.start_as()); + coords.push_back(dim.end_as()); + } + + Subarray sub(query.ctx(), forwrite); + sub.set_subarray(coords); + query.set_subarray(sub); + + auto field_sizes = + make_field_sizes(const_cast(fragment)); + templates::query::set_fields( + query.ctx().ptr().get(), + query.ptr().get(), + field_sizes, + const_cast(fragment), + [](unsigned d) { return "d" + std::to_string(d + 1); }, + [](unsigned a) { return "a" + std::to_string(a + 1); }); + + const auto status = query.submit(); + ASSERTER(status == Query::Status::COMPLETE); + + if (layout == TILEDB_GLOBAL_ORDER) { + query.finalize(); + } + + // check that sizes match what we expect + const uint64_t expect_num_cells = fragment.size(); + const uint64_t num_cells = + templates::query::num_cells(fragment, field_sizes); + + ASSERTER(num_cells == expect_num_cells); +} + } // namespace query namespace ddl { +template +struct cell_type_traits; + +template <> +struct cell_type_traits { + static constexpr sm::Datatype physical_type = sm::Datatype::INT32; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool is_nullable = false; +}; + +template <> +struct cell_type_traits { + static constexpr sm::Datatype physical_type = sm::Datatype::UINT64; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool is_nullable = false; +}; + +template +std::vector> physical_type_attributes() { + std::vector> ret; + auto attr = [&](const T&) { + ret.push_back(std::make_tuple( + cell_type_traits>::physical_type, + cell_type_traits>::cell_val_num, + cell_type_traits>::is_nullable)); + }; + std::apply( + [&](const auto&... value) { (attr(value), ...); }, + typename F::AttributeTuple()); + + return ret; +} + /** * Creates an array with a schema whose dimensions and attributes * come from the simplified arguments. diff --git a/test/support/src/fragment_info_helpers.h b/test/support/src/fragment_info_helpers.h new file mode 100644 index 00000000000..417302306ad --- /dev/null +++ b/test/support/src/fragment_info_helpers.h @@ -0,0 +1,164 @@ +/** + * @file fragment_info_helpers.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides declarations and definitions of functionality which + * may be common to tests inspecting fragment info and fragment metadata. + */ + +#ifndef TILEDB_TEST_FRAGMENT_INFO_HELPERS_H +#define TILEDB_TEST_FRAGMENT_INFO_HELPERS_H + +#include +#include + +#include "tiledb/api/c_api/fragment_info/fragment_info_api_internal.h" +#include "tiledb/sm/cpp_api/context.h" +#include "tiledb/sm/cpp_api/fragment_info.h" +#include "tiledb/sm/enums/layout.h" +#include "tiledb/sm/fragment/single_fragment_info.h" +#include "tiledb/sm/misc/types.h" +#include "tiledb/sm/tile/test/arithmetic.h" + +#include +#include + +namespace tiledb::test { + +template +std::vector>> +collect_and_validate_fragment_domains( + const Context& ctx, + sm::Layout tile_order, + const std::string& array_name, + const std::span tile_extents, + const sm::NDRange& expect_domain, + uint64_t max_fragment_size) { + const uint64_t num_dimensions = expect_domain.size(); + + FragmentInfo finfo(ctx, array_name); + finfo.load(); + + // collect fragment domains + std::vector>> fragment_domains; + for (uint32_t f = 0; f < finfo.fragment_num(); f++) { + std::vector> this_fragment_domain; + for (uint64_t d = 0; d < num_dimensions; d++) { + D bounds[2]; + finfo.get_non_empty_domain(f, d, &bounds[0]); + this_fragment_domain.push_back( + templates::Domain(bounds[0], bounds[1])); + } + fragment_domains.push_back(this_fragment_domain); + } + + // the fragments are not always emitted in the same order, sort them + auto domain_cmp = [&](const auto& left, const auto& right) { + for (uint64_t d = 0; d < num_dimensions; d++) { + if (left[d].lower_bound < right[d].lower_bound) { + return true; + } else if (left[d].lower_bound > right[d].lower_bound) { + return false; + } else if (left[d].upper_bound < right[d].upper_bound) { + return true; + } else if (left[d].upper_bound > right[d].upper_bound) { + return false; + } + } + return false; + }; + std::vector fragments_in_order(finfo.fragment_num()); + std::iota(fragments_in_order.begin(), fragments_in_order.end(), 0); + std::sort( + fragments_in_order.begin(), + fragments_in_order.end(), + [&](const uint32_t f_left, const uint32_t f_right) -> bool { + const auto& left = fragment_domains[f_left]; + const auto& right = fragment_domains[f_right]; + return domain_cmp(left, right); + }); + std::sort(fragment_domains.begin(), fragment_domains.end(), domain_cmp); + + // validate fragment domains + ASSERTER(!fragment_domains.empty()); + + // fragment domains should be contiguous in global order and cover the whole + // subarray + uint64_t subarray_tile_offset = 0; + for (uint32_t f = 0; f < fragments_in_order.size(); f++) { + const sm::NDRange& internal_domain = + finfo.ptr() + ->fragment_info() + ->single_fragment_info_vec()[fragments_in_order[f]] + .non_empty_domain(); + + const uint64_t f_num_tiles = + compute_num_tiles(tile_extents, internal_domain); + const uint64_t f_start_tile = compute_start_tile( + tile_order, tile_extents, expect_domain, internal_domain); + + ASSERTER(f_start_tile == subarray_tile_offset); + subarray_tile_offset += f_num_tiles; + } + ASSERTER( + subarray_tile_offset == + compute_num_tiles(tile_extents, expect_domain)); + + auto meta_size = [&](uint32_t f) -> uint64_t { + return finfo.ptr() + ->fragment_info() + ->single_fragment_info_vec()[f] + .meta() + ->fragment_meta_size(); + }; + + // validate fragment size - no fragment should be larger than max requested + // size + for (uint32_t f : fragments_in_order) { + const uint64_t fsize = finfo.fragment_size(f); + const uint64_t fmetasize = meta_size(f); + ASSERTER(fsize <= max_fragment_size + fmetasize); + } + + // validate fragment size - we wrote the largest possible fragments (no two + // adjacent should be under max fragment size) + for (uint32_t fi = 1; fi < fragments_in_order.size(); fi++) { + const uint32_t fprev = fragments_in_order[fi - 1]; + const uint32_t fcur = fragments_in_order[fi]; + const uint64_t combined_size = + finfo.fragment_size(fprev) + finfo.fragment_size(fcur); + const uint64_t combined_meta_size = meta_size(fprev) + meta_size(fcur); + ASSERTER(combined_size > max_fragment_size + combined_meta_size); + } + + return fragment_domains; +} + +} // namespace tiledb::test + +#endif From 8674d2ec8613a96eff532c5cb0ab971ab91f2703 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 11:10:43 -0400 Subject: [PATCH 077/109] Avoid uninitialized false positive for max_fragment_size --- test/src/unit-cppapi-consolidation.cc | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/test/src/unit-cppapi-consolidation.cc b/test/src/unit-cppapi-consolidation.cc index 83d7162bebb..657008cc28f 100644 --- a/test/src/unit-cppapi-consolidation.cc +++ b/test/src/unit-cppapi-consolidation.cc @@ -779,24 +779,12 @@ TEST_CASE( // unfiltered, each row takes `100000 * sizeof(int)` bytes, plus some // padding const uint64_t tile_size = (row.extent * col.extent * sizeof(int)) + 92; - uint64_t max_fragment_size; - - SECTION("Too small") { - max_fragment_size = tile_size - 1; - } - SECTION("Snug fit") { - max_fragment_size = tile_size; - } - SECTION("Not quite two rows") { - max_fragment_size = (2 * tile_size) - 1; - } - SECTION("Two rows") { - max_fragment_size = 2 * tile_size; - } + const uint64_t max_fragment_size = GENERATE_COPY( + tile_size - 1, tile_size, (2 * tile_size) - 1, 2 * tile_size); const uint64_t rows_per_fragment = max_fragment_size / tile_size; DYNAMIC_SECTION( - "rows_per_fragment = " + std::to_string(rows_per_fragment)) { + "max_fragment_size = " + std::to_string(max_fragment_size)) { if (rows_per_fragment == 0) { const auto expect = Catch::Matchers::ContainsSubstring( "Fragment size is too small to subdivide dense subarray into " From 952b263a7e97d0cc852b2b533ca5ad7ac33c968e Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 11:37:27 -0400 Subject: [PATCH 078/109] Overflow checks in tile arithmetic functions --- test/support/src/fragment_info_helpers.h | 2 +- tiledb/sm/tile/arithmetic.h | 54 ++++++++++++++++++------ tiledb/sm/tile/test/arithmetic.h | 11 +++-- tiledb/sm/tile/test/unit_arithmetic.cc | 47 ++++++++++++++++++++- 4 files changed, 96 insertions(+), 18 deletions(-) diff --git a/test/support/src/fragment_info_helpers.h b/test/support/src/fragment_info_helpers.h index 417302306ad..5401fa912ab 100644 --- a/test/support/src/fragment_info_helpers.h +++ b/test/support/src/fragment_info_helpers.h @@ -119,7 +119,7 @@ collect_and_validate_fragment_domains( const uint64_t f_num_tiles = compute_num_tiles(tile_extents, internal_domain); - const uint64_t f_start_tile = compute_start_tile( + const std::optional f_start_tile = compute_start_tile( tile_order, tile_extents, expect_domain, internal_domain); ASSERTER(f_start_tile == subarray_tile_offset); diff --git a/tiledb/sm/tile/arithmetic.h b/tiledb/sm/tile/arithmetic.h index 4957d0113af..67921ac6d9b 100644 --- a/tiledb/sm/tile/arithmetic.h +++ b/tiledb/sm/tile/arithmetic.h @@ -47,6 +47,7 @@ #ifndef TILEDB_TILE_ARITHMETIC_H #define TILEDB_TILE_ARITHMETIC_H +#include "tiledb/common/arithmetic.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/enums/layout.h" #include "tiledb/sm/misc/types.h" @@ -76,7 +77,16 @@ static bool is_rectangular_domain( sm::Dimension::tile_idx( domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + 1; - hyperrow_num_tiles *= d_inner_num_tiles; + + const auto maybe = checked_arithmetic::mul( + hyperrow_num_tiles, d_inner_num_tiles); + if (maybe.has_value()) { + hyperrow_num_tiles = maybe.value(); + } else { + throw std::overflow_error( + "Cannot compute subrectangle of domain due to arithmetic overflow: " + "domain tile extents may be too large"); + } } const uint64_t hyperrow_offset = start_tile % hyperrow_num_tiles; @@ -101,11 +111,12 @@ static bool is_rectangular_domain( * dimension `d` (and is thus always 1 for the final dimension). */ template -std::vector compute_hyperrow_sizes( +std::vector> compute_hyperrow_sizes( Layout tile_order, std::span tile_extents, const sm::NDRange& domain) { - std::vector hyperrow_sizes(tile_extents.size() + 1, 1); + std::vector> hyperrow_sizes( + tile_extents.size() + 1, 1); for (uint64_t di = 0; di < tile_extents.size(); di++) { const uint64_t d = (tile_order == Layout::ROW_MAJOR ? di : tile_extents.size() - di - 1); @@ -116,7 +127,12 @@ std::vector compute_hyperrow_sizes( hyperrow_sizes[di] = d_num_tiles; } for (uint64_t d = tile_extents.size(); d > 0; d--) { - hyperrow_sizes[d - 1] = hyperrow_sizes[d - 1] * hyperrow_sizes[d]; + if (hyperrow_sizes[d - 1].has_value() && hyperrow_sizes[d].has_value()) { + hyperrow_sizes[d - 1] = checked_arithmetic::mul( + hyperrow_sizes[d - 1].value(), hyperrow_sizes[d].value()); + } else { + hyperrow_sizes[d - 1] = std::nullopt; + } } return hyperrow_sizes; @@ -138,21 +154,33 @@ static std::optional domain_tile_offset( sm::NDRange r; r.resize(tile_extents.size()); - const std::vector dimension_sizes = + const std::vector> dimension_sizes = compute_hyperrow_sizes(tile_order, tile_extents, domain); for (uint64_t di = 0; di < tile_extents.size(); di++) { const uint64_t d = (tile_order == Layout::ROW_MAJOR ? di : tile_extents.size() - di - 1); - const uint64_t outer_num_tiles = dimension_sizes[di]; - const uint64_t hyperrow_num_tiles = dimension_sizes[di + 1]; - - const T this_dimension_start_tile = (start_tile / hyperrow_num_tiles) % - (outer_num_tiles / hyperrow_num_tiles); - const T this_dimension_end_tile = - ((start_tile + num_tiles - 1) / hyperrow_num_tiles) % - (outer_num_tiles / hyperrow_num_tiles); + if (!dimension_sizes[di + 1].has_value()) { + throw std::overflow_error( + "Cannot compute subrectangle of domain due to arithmetic overflow: " + "domain tile extents may be too large"); + } + const uint64_t hyperrow_num_tiles = dimension_sizes[di + 1].value(); + + T this_dimension_start_tile, this_dimension_end_tile; + if (dimension_sizes[di].has_value()) { + const uint64_t outer_num_tiles = dimension_sizes[di].value(); + this_dimension_start_tile = (start_tile / hyperrow_num_tiles) % + (outer_num_tiles / hyperrow_num_tiles); + this_dimension_end_tile = + ((start_tile + num_tiles - 1) / hyperrow_num_tiles) % + (outer_num_tiles / hyperrow_num_tiles); + } else { + this_dimension_start_tile = start_tile / hyperrow_num_tiles; + this_dimension_end_tile = + (start_tile + num_tiles - 1) / hyperrow_num_tiles; + } if (start_tile % hyperrow_num_tiles == 0) { // aligned to the start of the hyperrow diff --git a/tiledb/sm/tile/test/arithmetic.h b/tiledb/sm/tile/test/arithmetic.h index be63f318cfe..659fb5329ec 100644 --- a/tiledb/sm/tile/test/arithmetic.h +++ b/tiledb/sm/tile/test/arithmetic.h @@ -62,12 +62,12 @@ uint64_t compute_num_tiles( * sizes in `tile_extents` */ template -uint64_t compute_start_tile( +std::optional compute_start_tile( sm::Layout tile_order, std::span tile_extents, const sm::NDRange& domain, const sm::NDRange& subrectangle) { - const std::vector hyperrow_sizes = + const std::vector> hyperrow_sizes = sm::compute_hyperrow_sizes(tile_order, tile_extents, domain); uint64_t start_tile_result = 0; @@ -79,7 +79,12 @@ uint64_t compute_start_tile( subrectangle[d].start_as(), domain[d].start_as(), tile_extents[d]); - start_tile_result += start_tile_this_dimension * hyperrow_sizes[di + 1]; + if (hyperrow_sizes[di + 1].has_value()) { + start_tile_result += + start_tile_this_dimension * hyperrow_sizes[di + 1].value(); + } else { + return std::nullopt; + } } return start_tile_result; diff --git a/tiledb/sm/tile/test/unit_arithmetic.cc b/tiledb/sm/tile/test/unit_arithmetic.cc index 43017224817..c9492b86bf1 100644 --- a/tiledb/sm/tile/test/unit_arithmetic.cc +++ b/tiledb/sm/tile/test/unit_arithmetic.cc @@ -345,7 +345,7 @@ std::optional instance_domain_tile_offset( compute_num_tiles(tile_extents, adjusted_domain.value()); ASSERTER(num_tiles_result == num_tiles); - const uint64_t start_tile_result = compute_start_tile( + const std::optional start_tile_result = compute_start_tile( tile_order, tile_extents, domain, adjusted_domain.value()); ASSERTER(start_tile_result == start_tile); @@ -546,6 +546,51 @@ TEST_CASE("domain_tile_offset 2d", "[arithmetic]") { } } + SECTION("CORE-290 Example") { + const Dim64 row(0, std::numeric_limits::max() - 1, 4); + const Dim64 col(0, 99999, 100000 / row.extent); + + auto make_row = [&](uint64_t r_start, uint64_t r_end) { + return Dom64( + row.domain.lower_bound + r_start * row.extent, + row.domain.lower_bound + r_end * row.extent + row.extent - 1); + }; + + const auto r1 = instance_domain_tile_offset( + {row, col}, 0, 4, Layout::ROW_MAJOR); + CHECK(r1 == std::vector{make_row(0, 0), col.domain}); + } + + SECTION("Hyperrow overflow") { + const uint64_t target_tiles_in_domain = 1 << 16; + const uint64_t lower_bound = 0; + const uint64_t upper_bound = std::numeric_limits::max() - 1; + const uint64_t extent = + (upper_bound - lower_bound + 1) / target_tiles_in_domain; + const Dim64 d(lower_bound, upper_bound, extent); + + SECTION("Not overflow") { + const auto r = instance_domain_tile_offset( + {d, d, d, d}, 0, 1, Layout::ROW_MAJOR); + CHECK( + r == std::vector{ + Dom64(0, extent - 1), + Dom64(0, extent - 1), + Dom64(0, extent - 1), + Dom64(0, extent - 1)}); + } + + SECTION("Overflow") { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_domain_tile_offset( + {d, d, d, d, d}, 0, 1, Layout::ROW_MAJOR), + expect); + } + } + rc::prop("any tiles", []() { const Dim64 d1 = *rc::make_dimension(std::nullopt, {64}); const Dim64 d2 = *rc::make_dimension(std::nullopt, {64}); From 9c62085d13025c6b7d491e1edcff94742c335460 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 11:37:47 -0400 Subject: [PATCH 079/109] Fix narrowing conversion that is suddenly showing up --- test/support/src/array_templates.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index def8d85a130..91bdbab30cd 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -259,7 +259,7 @@ struct QueryConditionEvalSchema { */ bool test( const Fragment& fragment, - int record, + uint64_t record, const tiledb::sm::ASTNode& condition) const { using DimensionTuple = stdx::decay_tuple; using AttributeTuple = stdx::decay_tuple; From bce40a78c0fad5cc3349acbe71aa5444d24e1e5c Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 12:06:48 -0400 Subject: [PATCH 080/109] fragment_end is not std::option to avoid false positive uninitialized warning --- .../sm/query/writers/global_order_writer.cc | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 64bf5b90cec..4a3ee5ca70f 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1621,15 +1621,9 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( uint64_t fragment_start = 0; std::vector fragments; -// NB: gcc has a false positive uninitialized use warning for `fragment_end` -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" - std::optional fragment_end; -#pragma GCC diagnostic pop -#else - std::optional fragment_end; -#endif + // NB: this really wants to be `std::option` but some versions of gcc have a + // false positive uninitialized use warning + int64_t fragment_end = -1; // Make sure we don't write more than the desired fragment size. for (uint64_t t = 0; t < tile_num; t++) { @@ -1645,7 +1639,7 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( if (running_tiles_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to write a single tile"); - } else if (!fragment_end.has_value()) { + } else if (fragment_end < 0) { if (fragment_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to subdivide dense subarray into " @@ -1658,8 +1652,9 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( iassert(running_tiles_size >= fragment_size); running_tiles_size -= fragment_size; - fragment_start = fragment_end.value_or(0); - fragment_end = std::nullopt; + fragment_start = + static_cast(std::max(0, fragment_end)); + fragment_end = -1; } if (!subarray_tile_offset.has_value() || !max_fragment_size_.has_value() || @@ -1669,19 +1664,21 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( subarray_tile_offset.value() + fragment_start, t - fragment_start + 1)) { fragment_size = running_tiles_size + tile_size; - fragment_end = t + 1; + fragment_end = static_cast(t + 1); } running_tiles_size += tile_size; } - if (fragment_end.has_value()) { + if (fragment_end >= 0) { fragments.push_back(fragment_start); } return GlobalOrderWriter::FragmentTileBoundaries{ .tile_offsets_ = fragments, - .num_writeable_tiles_ = fragment_end.value_or(fragment_start), + .num_writeable_tiles_ = + (fragment_end < 0 ? fragment_start : + static_cast(fragment_end)), .last_fragment_size_ = fragment_size}; } From e569c32d1bc3ace57e0c6c2ff8ddd44b745d7f26 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 12:10:12 -0400 Subject: [PATCH 081/109] constexpr tile_order --- test/src/unit-cppapi-consolidation.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/test/src/unit-cppapi-consolidation.cc b/test/src/unit-cppapi-consolidation.cc index 657008cc28f..887fa6f2762 100644 --- a/test/src/unit-cppapi-consolidation.cc +++ b/test/src/unit-cppapi-consolidation.cc @@ -610,6 +610,8 @@ instance_dense_consolidation( uint64_t max_fragment_size) { using Coord = templates::Dimension
::value_type; + static constexpr sm::Layout tile_order = sm::Layout::ROW_MAJOR; + // create array instance_dense_consolidation_create_array(ctx, array_name, domain); @@ -636,11 +638,7 @@ instance_dense_consolidation( const uint64_t f_num_tiles = f.num_cells() / num_cells_per_tile; const std::optional subarray = domain_tile_offset( - sm::Layout::ROW_MAJOR, - tile_extents, - array_domain, - start_tile, - f_num_tiles); + tile_order, tile_extents, array_domain, start_tile, f_num_tiles); ASSERTER(subarray.has_value()); templates::query::write_fragment( @@ -653,7 +651,7 @@ instance_dense_consolidation( sm::NDRange non_empty_domain; { std::optional maybe = domain_tile_offset( - sm::Layout::ROW_MAJOR, tile_extents, array_domain, 0, start_tile); + tile_order, tile_extents, array_domain, 0, start_tile); ASSERTER(maybe.has_value()); non_empty_domain = maybe.value(); } @@ -688,7 +686,7 @@ instance_dense_consolidation( const auto fragment_domains = collect_and_validate_fragment_domains( ctx, - sm::Layout::ROW_MAJOR, + tile_order, array_name, tile_extents, non_empty_domain, From 750f988a91da7897201eca07a35edf25503a1c26 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 12:43:59 -0400 Subject: [PATCH 082/109] instance_dense_consolidation sorts in global order --- test/src/unit-cppapi-consolidation.cc | 47 +++++++++++++++++++++++++++ test/support/src/array_templates.h | 21 ++++++++++++ 2 files changed, 68 insertions(+) diff --git a/test/src/unit-cppapi-consolidation.cc b/test/src/unit-cppapi-consolidation.cc index 887fa6f2762..97b43057c0c 100644 --- a/test/src/unit-cppapi-consolidation.cc +++ b/test/src/unit-cppapi-consolidation.cc @@ -38,6 +38,7 @@ #include "test/support/src/helpers.h" #include "tiledb/api/c_api/array/array_api_internal.h" #include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/misc/comparators.h" using namespace tiledb; using namespace tiledb::test; @@ -704,6 +705,52 @@ instance_dense_consolidation( for (const auto& f : fragments) { input_concatenated.extend(f); } + + // sort in global order + { + std::vector idxs(input_concatenated.size()); + std::iota(idxs.begin(), idxs.end(), 0); + + std::vector next_coord; + next_coord.reserve(domain.size()); + for (uint64_t d = 0; d < domain.size(); d++) { + next_coord.push_back(domain[d].domain.lower_bound); + } + + std::vector> coords; + coords.reserve(input_concatenated.size()); + for (uint64_t i = 0; i < input_concatenated.size(); i++) { + coords.push_back(next_coord); + for (uint64_t di = 0; di < domain.size(); di++) { + const uint64_t d = domain.size() - di - 1; + if (next_coord[d] < domain[d].domain.upper_bound) { + ++next_coord[d]; + break; + } else { + next_coord[d] = 0; + } + } + } + + sm::GlobalCellCmp globalcmp( + forread.ptr()->array()->array_schema_latest().domain()); + + const auto hyperrow_sizes = compute_hyperrow_sizes( + tile_order, tile_extents, non_empty_domain); + + auto icmp = [&](uint64_t ia, uint64_t ib) -> bool { + const auto sa = templates::global_cell_cmp_span(coords[ia]); + const auto sb = templates::global_cell_cmp_span(coords[ib]); + return globalcmp(sa, sb); + }; + + std::sort(idxs.begin(), idxs.end(), icmp); + + input_concatenated.attributes() = stdx::select( + stdx::reference_tuple(input_concatenated.attributes()), + std::span(idxs)); + } + output = input_concatenated; Subarray sub(ctx, forread); diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 91bdbab30cd..3940635f0d0 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -111,6 +111,27 @@ struct global_cell_cmp_std_tuple { StdTuple tup_; }; +/** + * Adapts a span of coordinates for comparison using `GlobalCellCmp`. + */ +template +struct global_cell_cmp_span { + global_cell_cmp_span(std::span values) + : values_(values) { + } + + tiledb::common::UntypedDatumView dimension_datum( + const tiledb::sm::Dimension&, unsigned dim_idx) const { + return UntypedDatumView(&values_[dim_idx], sizeof(Coord)); + } + + const void* coord(unsigned dim) const { + return &values_[dim]; + } + + std::span values_; +}; + /** * Forward declaration of query_buffers * which will be specialized. From 060f69c8893fb3aa3b179196ebb3fcaac9139895 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 15:04:54 -0400 Subject: [PATCH 083/109] validate_fragment_domains missed a spot for tile order --- test/support/src/fragment_info_helpers.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/support/src/fragment_info_helpers.h b/test/support/src/fragment_info_helpers.h index 5401fa912ab..f7899161887 100644 --- a/test/support/src/fragment_info_helpers.h +++ b/test/support/src/fragment_info_helpers.h @@ -79,7 +79,9 @@ collect_and_validate_fragment_domains( // the fragments are not always emitted in the same order, sort them auto domain_cmp = [&](const auto& left, const auto& right) { - for (uint64_t d = 0; d < num_dimensions; d++) { + for (uint64_t di = 0; di < num_dimensions; di++) { + const uint64_t d = + (tile_order == sm::Layout::ROW_MAJOR ? di : num_dimensions - di - 1); if (left[d].lower_bound < right[d].lower_bound) { return true; } else if (left[d].lower_bound > right[d].lower_bound) { From f26fe05a257d134c33d3dfb6f005f61b8ed797a5 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 15:07:55 -0400 Subject: [PATCH 084/109] gen_fragment_size produces minimum 1 tile --- test/src/unit-cppapi-max-fragment-size.cc | 28 ++++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index ef2052e0447..7469d939454 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1150,7 +1150,7 @@ void rapidcheck_dense_array( } auto gen_fragment_size = rc::gen::inRange( - num_tiles_per_hyperrow * estimate_single_tile_fragment_size * 1, + estimate_single_tile_fragment_size, num_tiles_per_hyperrow * estimate_single_tile_fragment_size * 8); const uint64_t max_fragment_size = *gen_fragment_size; @@ -1194,13 +1194,25 @@ TEST_CASE( Context ctx; SECTION("Shrinking") { - instance_dense_global_order( - ctx, - TILEDB_ROW_MAJOR, - TILEDB_COL_MAJOR, - 48, - {Dim64(0, 116, 1), Dim64(0, 0, 1)}, - {Dom64(2, 20), Dom64(0, 0)}); + SECTION("Example 1") { + instance_dense_global_order( + ctx, + TILEDB_ROW_MAJOR, + TILEDB_COL_MAJOR, + 48, + {Dim64(0, 116, 1), Dim64(0, 0, 1)}, + {Dom64(2, 20), Dom64(0, 0)}); + } + + SECTION("Example 2") { + instance_dense_global_order( + ctx, + TILEDB_COL_MAJOR, + TILEDB_ROW_MAJOR, + 24, + {Dim64(0, 60, 1), Dim64(0, 20, 1)}, + {Dom64(0, 1), Dom64(0, 1)}); + } } rc::prop("max fragment size dense 2d", [&ctx]() { From a50040ce4d46340917f038f0b3dc32d747c41af3 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 15:08:29 -0400 Subject: [PATCH 085/109] Remove SKIP and fix max_fragment_size --- test/src/unit-cppapi-consolidation.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/test/src/unit-cppapi-consolidation.cc b/test/src/unit-cppapi-consolidation.cc index 97b43057c0c..f47d283402b 100644 --- a/test/src/unit-cppapi-consolidation.cc +++ b/test/src/unit-cppapi-consolidation.cc @@ -860,9 +860,6 @@ TEST_CASE( } SECTION("Rectangle tiles") { - // FIXME - SKIP("Fails with FPE due to overflow in compute_hyperrow_sizes"); - const Dim64 row(0, std::numeric_limits::max() - 1, 4); const Dim64 col(0, 99999, 100000 / row.extent); @@ -923,14 +920,14 @@ TEST_CASE( const auto output_fragments = instance_dense_consolidation< sm::Datatype::UINT64, DenseFragmentFixed>( - ctx, array_name, {row, col}, input_fragments, tile_size); + ctx, array_name, {row, col}, input_fragments, 2 * tile_size); } SECTION("Three tiles") { // now we have some trouble, each row is 4 tiles, 3 of them fit, // so we will alternate fragments with 3 tiles and fragments with 1 // tile to fill out the row, yikes std::vector> expect; - for (uint64_t r = 0; r < num_fragments * 4; r++) { + for (uint64_t r = 0; r < num_fragments; r++) { expect.push_back( {Dom64(r * 4, r * 4 + 3), Dom64(0, (col.extent * 3) - 1)}); expect.push_back( @@ -940,7 +937,7 @@ TEST_CASE( const auto output_fragments = instance_dense_consolidation< sm::Datatype::UINT64, DenseFragmentFixed>( - ctx, array_name, {row, col}, input_fragments, tile_size); + ctx, array_name, {row, col}, input_fragments, 3 * tile_size); CHECK(output_fragments == expect); } SECTION("Four tiles") { @@ -951,7 +948,7 @@ TEST_CASE( const auto output_fragments = instance_dense_consolidation< sm::Datatype::UINT64, DenseFragmentFixed>( - ctx, array_name, {row, col}, input_fragments, tile_size); + ctx, array_name, {row, col}, input_fragments, 4 * tile_size); CHECK(output_fragments == expect); } SECTION("Five tiles") { @@ -963,7 +960,7 @@ TEST_CASE( const auto output_fragments = instance_dense_consolidation< sm::Datatype::UINT64, DenseFragmentFixed>( - ctx, array_name, {row, col}, input_fragments, tile_size); + ctx, array_name, {row, col}, input_fragments, 5 * tile_size); CHECK(output_fragments == expect); } } From ef35f1be254265bb293ec14caa66fb6acd67b46d Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Fri, 31 Oct 2025 22:10:35 -0400 Subject: [PATCH 086/109] Add write_unit_num_cells to rapidcheck test and capture a failing example --- test/src/unit-cppapi-max-fragment-size.cc | 29 ++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 7469d939454..8a57ab10f03 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1149,13 +1149,29 @@ void rapidcheck_dense_array( num_tiles_per_hyperrow *= dimensions[dim].num_tiles(subarray[dim]); } + const uint64_t num_tiles_total = + num_tiles_per_hyperrow * + (tile_order == TILEDB_ROW_MAJOR ? + (dimensions[0].num_tiles(subarray[0])) : + (dimensions.back().num_tiles(subarray.back()))); + auto gen_fragment_size = rc::gen::inRange( estimate_single_tile_fragment_size, num_tiles_per_hyperrow * estimate_single_tile_fragment_size * 8); const uint64_t max_fragment_size = *gen_fragment_size; + auto gen_write_unit_num_cells = + rc::gen::inRange(1, num_tiles_total * num_cells_per_tile); + const uint64_t write_unit_num_cells = *gen_write_unit_num_cells; + instance_dense_global_order( - ctx, tile_order, cell_order, max_fragment_size, dimensions, subarray); + ctx, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells); } TEST_CASE( @@ -1213,6 +1229,17 @@ TEST_CASE( {Dim64(0, 60, 1), Dim64(0, 20, 1)}, {Dom64(0, 1), Dom64(0, 1)}); } + + SECTION("Example 3") { + instance_dense_global_order( + ctx, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 48, + {Dim64(0, 35, 1), Dim64(0, 420, 1)}, + {Dom64(0, 1), Dom64(0, 4)}, + 1); + } } rc::prop("max fragment size dense 2d", [&ctx]() { From 3b2e5492f43be57906e141c4081a768cb05a3e91 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Sat, 1 Nov 2025 17:14:58 -0400 Subject: [PATCH 087/109] Add and fix Example 4 with fragment starting at end of row --- test/src/unit-cppapi-max-fragment-size.cc | 19 +++++++- .../sm/query/writers/global_order_writer.cc | 48 ++++++++++--------- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 8a57ab10f03..e2948e2ded6 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -651,8 +651,8 @@ instance_dense_global_order( uint64_t in_memory_size = 0; std::optional in_memory_num_tiles; for (const auto& field : g->last_tiles_) { - // NB: there should always be at least one tile which contains the state - // of the current fragment + // NB: there should always be at least one tile which contains the + // state of the current fragment ASSERTER(!field.second.empty()); for (uint64_t t = 0; t < field.second.size() - 1; t++) { @@ -1240,6 +1240,21 @@ TEST_CASE( {Dom64(0, 1), Dom64(0, 4)}, 1); } + + SECTION("Example 4") { + /* + * In this example we end up with a fragment which fills all but one tile + * of a single row. The last tile in the row has to be its own fragment. + */ + auto fragments = instance_dense_global_order( + ctx, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 924, + {Dim64(0, 304, 8), Dim64(0, 147, 2)}, + {Dom64(0, 31), Dom64(0, 23)}, + 41); + } } rc::prop("max fragment size dense 2d", [&ctx]() { diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 4a3ee5ca70f..3e3cc74ce4f 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -980,15 +980,6 @@ Status GlobalOrderWriter::global_write() { const uint64_t offset_not_written = fragments.num_writeable_tiles_; - if (!global_write_state_->frag_meta_ || - fragments.tile_offsets_.size() > 1) { - RETURN_CANCEL_OR_ERROR(start_new_fragment()); - } - - global_write_state_->frag_meta_->set_num_tiles( - global_write_state_->frag_meta_->tile_index_base() + tile_num - - offset_not_written); - // Dense array does not have bounding rectangles. // If there were any other tile metadata which we needed to draw from the // un-filtered tiles, we would have to store that in the global write state @@ -1608,14 +1599,12 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( uint64_t running_tiles_size = current_fragment_size_; uint64_t fragment_size = current_fragment_size_; - std::optional subarray_tile_offset; - if (dense()) { - if (global_write_state_->frag_meta_) { - subarray_tile_offset = global_write_state_->dense_.domain_tile_offset_ + - global_write_state_->frag_meta_->tile_index_base(); - } else { - subarray_tile_offset = 0; - } + uint64_t write_state_start_tile = + global_write_state_->dense_.domain_tile_offset_; + uint64_t current_fragment_num_tiles_already_written = 0; + if (dense() && global_write_state_->frag_meta_) { + current_fragment_num_tiles_already_written = + global_write_state_->frag_meta_->tile_index_base(); } uint64_t fragment_start = 0; @@ -1655,14 +1644,27 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( fragment_start = static_cast(std::max(0, fragment_end)); fragment_end = -1; + + write_state_start_tile += current_fragment_num_tiles_already_written; + current_fragment_num_tiles_already_written = 0; } - if (!subarray_tile_offset.has_value() || !max_fragment_size_.has_value() || - is_rectangular_domain( - array_schema_, - subarray_.ndrange(0), - subarray_tile_offset.value() + fragment_start, - t - fragment_start + 1)) { + bool is_part_of_fragment = true; + if (dense() && max_fragment_size_.has_value()) { + // Dense fragments must have a rectangular domain. + // And all fragments must be smaller than `max_fragment_size_`. + // We must identify the highest tile number which satisfies both criteria. + const uint64_t fragment_start_tile = + write_state_start_tile + fragment_start; + const uint64_t maybe_num_tiles = + current_fragment_num_tiles_already_written + t - fragment_start + 1; + is_part_of_fragment = is_rectangular_domain( + array_schema_, + subarray_.ndrange(0), + fragment_start_tile, + maybe_num_tiles); + } + if (is_part_of_fragment) { fragment_size = running_tiles_size + tile_size; fragment_end = static_cast(t + 1); } From 78fe4fa34500fe8d78038e3d519e80b712e4bf33 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Sat, 1 Nov 2025 17:45:25 -0400 Subject: [PATCH 088/109] enum IsRectangularDomain --- test/src/unit-cppapi-max-fragment-size.cc | 17 ++-- .../sm/query/writers/global_order_writer.cc | 15 ++-- tiledb/sm/tile/arithmetic.h | 22 ++++- tiledb/sm/tile/test/unit_arithmetic.cc | 89 ++++++++++--------- 4 files changed, 82 insertions(+), 61 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index e2948e2ded6..ce34db8fd92 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -674,16 +674,17 @@ instance_dense_global_order( ASSERTER(in_memory_num_tiles.has_value()); for (uint64_t num_tiles = 0; num_tiles < in_memory_num_tiles.value(); num_tiles++) { - const bool rectangle = sm::is_rectangular_domain( - static_cast(tile_order), - tile_extents, - smsubarray_aligned, - g->dense_.domain_tile_offset_, - g->frag_meta_->tile_index_base() + num_tiles); + const sm::IsRectangularDomain rectangle = + sm::is_rectangular_domain( + static_cast(tile_order), + tile_extents, + smsubarray_aligned, + g->dense_.domain_tile_offset_, + g->frag_meta_->tile_index_base() + num_tiles); if (num_tiles == 0) { - ASSERTER(rectangle); + ASSERTER(rectangle == sm::IsRectangularDomain::Yes); } else { - ASSERTER(!rectangle); + ASSERTER(rectangle != sm::IsRectangularDomain::Yes); } } } diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 3e3cc74ce4f..dedb47bf068 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -71,7 +71,7 @@ namespace tiledb::sm { * each of the input tiles to determine whether a rectangle is formed and * including a tile in a fragment is sound. */ -static bool is_rectangular_domain( +static IsRectangularDomain is_rectangular_domain( const ArraySchema& arrayschema, const NDRange& domain, uint64_t start_tile, @@ -98,7 +98,7 @@ static bool is_rectangular_domain( start_tile, num_tiles); } else { - return false; + return IsRectangularDomain::Never; } }; return apply_with_type(impl, arraydomain.dimension_ptr(0)->type()); @@ -1658,11 +1658,12 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( write_state_start_tile + fragment_start; const uint64_t maybe_num_tiles = current_fragment_num_tiles_already_written + t - fragment_start + 1; - is_part_of_fragment = is_rectangular_domain( - array_schema_, - subarray_.ndrange(0), - fragment_start_tile, - maybe_num_tiles); + is_part_of_fragment = + (is_rectangular_domain( + array_schema_, + subarray_.ndrange(0), + fragment_start_tile, + maybe_num_tiles) == IsRectangularDomain::Yes); } if (is_part_of_fragment) { fragment_size = running_tiles_size + tile_size; diff --git a/tiledb/sm/tile/arithmetic.h b/tiledb/sm/tile/arithmetic.h index 67921ac6d9b..58882132d2c 100644 --- a/tiledb/sm/tile/arithmetic.h +++ b/tiledb/sm/tile/arithmetic.h @@ -55,12 +55,26 @@ namespace tiledb::sm { +/** + * Ternary value for the result of `is_rectangular_domain`. + * Describes whether a `[start_tile, start_tile + num_tiles)` range + * over a given domain forms a rectangle. + */ +enum class IsRectangularDomain { + /** The range is not a rectangle, but extending it could create one. */ + No, + /** The range is not a rectangle, and extending it can never create one. */ + Never, + /** The range is a rectangle. */ + Yes +}; + /** * @return true if the range `[start_tile, start_tile + num_tiles)` represents * a hyper-rectangle inside `domain` with tile sizes given by `tile_extents` */ template -static bool is_rectangular_domain( +static IsRectangularDomain is_rectangular_domain( Layout tile_order, std::span tile_extents, const sm::NDRange& domain, @@ -92,13 +106,13 @@ static bool is_rectangular_domain( const uint64_t hyperrow_offset = start_tile % hyperrow_num_tiles; if (hyperrow_offset + num_tiles > hyperrow_num_tiles) { if (hyperrow_offset != 0) { - return false; + return IsRectangularDomain::Never; } else if (num_tiles % hyperrow_num_tiles != 0) { - return false; + return IsRectangularDomain::No; } } } - return true; + return IsRectangularDomain::Yes; } /** diff --git a/tiledb/sm/tile/test/unit_arithmetic.cc b/tiledb/sm/tile/test/unit_arithmetic.cc index c9492b86bf1..3cb2cb18342 100644 --- a/tiledb/sm/tile/test/unit_arithmetic.cc +++ b/tiledb/sm/tile/test/unit_arithmetic.cc @@ -15,7 +15,7 @@ using namespace sm; using namespace tiledb::test; template -static bool is_rectangular_domain( +static IsRectangularDomain is_rectangular_domain( std::span tile_extents, T lower_bound, T upper_bound, @@ -29,7 +29,7 @@ static bool is_rectangular_domain( } template -static bool is_rectangular_domain( +static IsRectangularDomain is_rectangular_domain( std::span tile_extents, T d1_lower_bound, T d1_upper_bound, @@ -46,7 +46,7 @@ static bool is_rectangular_domain( } template -static bool is_rectangular_domain( +static IsRectangularDomain is_rectangular_domain( const templates::Dimension
& d1, const templates::Dimension
& d2, uint64_t start_tile, @@ -66,7 +66,7 @@ static bool is_rectangular_domain( } template -static bool is_rectangular_domain( +static IsRectangularDomain is_rectangular_domain( const templates::Dimension
& d1, const templates::Dimension
& d2, const templates::Dimension
& d3, @@ -94,12 +94,13 @@ TEST_CASE("is_rectangular_domain 1d", "[arithmetic]") { *rc::gen::inRange(1, dimension.num_tiles() - start_tile); const std::vector extents = {dimension.extent}; - RC_ASSERT(is_rectangular_domain( - extents, - dimension.domain.lower_bound, - dimension.domain.upper_bound, - start_tile, - num_tiles)); + RC_ASSERT( + is_rectangular_domain( + extents, + dimension.domain.lower_bound, + dimension.domain.upper_bound, + start_tile, + num_tiles) == IsRectangularDomain::Yes); }); } @@ -119,14 +120,15 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { for (uint64_t num_tiles = 1; start_tile + num_tiles <= 16; num_tiles++) { CAPTURE(start_tile, num_tiles); - CHECK(is_rectangular_domain( - extents, - d1_lower, - d1_upper, - d2_lower, - d2_upper, - start_tile, - num_tiles)); + CHECK( + is_rectangular_domain( + extents, + d1_lower, + d1_upper, + d2_lower, + d2_upper, + start_tile, + num_tiles) == IsRectangularDomain::Yes); } } } @@ -135,7 +137,8 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { // 7x7 tiles will subdivide the 16x16 square into 3x3 tiles const std::vector extents = {7, 7}; - auto tt = [&](uint64_t start_tile, uint64_t num_tiles) -> bool { + auto tt = [&](uint64_t start_tile, + uint64_t num_tiles) -> IsRectangularDomain { return is_rectangular_domain( extents, d1_lower, @@ -152,9 +155,9 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { for (uint64_t num_tiles = 1; start_tile + num_tiles <= 9; num_tiles++) { CAPTURE(start_tile, num_tiles); if (num_tiles < 3 || num_tiles % 3 == 0) { - CHECK(tt(start_tile, num_tiles)); + CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else { - CHECK(!tt(start_tile, num_tiles)); + CHECK(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); } } } @@ -164,9 +167,9 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { for (uint64_t num_tiles = 1; start_tile + num_tiles <= 9; num_tiles++) { CAPTURE(start_tile, num_tiles); if ((start_tile % 3) + num_tiles <= 3) { - CHECK(tt(start_tile, num_tiles)); + CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else { - CHECK(!tt(start_tile, num_tiles)); + CHECK(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); } } } @@ -178,7 +181,8 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { auto instance_is_rectangular_domain_2d = [](Dim64 d1, Dim64 d2) { const std::vector extents = {d1.extent, d2.extent}; - auto tt = [&](uint64_t start_tile, uint64_t num_tiles) -> bool { + auto tt = [&](uint64_t start_tile, + uint64_t num_tiles) -> IsRectangularDomain { return is_rectangular_domain(d1, d2, start_tile, num_tiles); }; @@ -190,9 +194,9 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { num_tiles++) { if (num_tiles <= d2.num_tiles() || num_tiles % d2.num_tiles() == 0) { - ASSERTER(tt(t, num_tiles)); + ASSERTER(tt(t, num_tiles) == IsRectangularDomain::Yes); } else { - ASSERTER(!tt(t, num_tiles)); + ASSERTER(tt(t, num_tiles) != IsRectangularDomain::Yes); } } // other tiles @@ -200,9 +204,9 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { for (uint64_t num_tiles = 1; t + o + num_tiles <= total_tiles; num_tiles++) { if (((t + o) % d2.num_tiles()) + num_tiles <= d2.num_tiles()) { - ASSERTER(tt(t + o, num_tiles)); + ASSERTER(tt(t + o, num_tiles) == IsRectangularDomain::Yes); } else { - ASSERTER(!tt(t + o, num_tiles)); + ASSERTER(tt(t + o, num_tiles) != IsRectangularDomain::Yes); } } } @@ -238,9 +242,9 @@ TEST_CASE("is_rectangular_domain 3d", "[arithmetic]") { for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; num_tiles++) { - const bool rectangle = + const IsRectangularDomain rectangle = is_rectangular_domain(d2, d3, start_tile, num_tiles); - const bool plane = + const IsRectangularDomain plane = is_rectangular_domain(d1, d2, d3, start_tile, num_tiles); RC_ASSERT(rectangle == plane); @@ -255,7 +259,8 @@ TEST_CASE("is_rectangular_domain 3d", "[arithmetic]") { */ auto instance_is_rectangular_domain_3d = [](Dim64 d1, Dim64 d2, Dim64 d3) { - auto tt = [&](uint64_t start_tile, uint64_t num_tiles) -> bool { + auto tt = [&](uint64_t start_tile, + uint64_t num_tiles) -> IsRectangularDomain { return is_rectangular_domain(d1, d2, d3, start_tile, num_tiles); }; @@ -269,34 +274,34 @@ TEST_CASE("is_rectangular_domain 3d", "[arithmetic]") { if (start_tile % plane_tiles == 0) { // aligned to a plane, several options to be a rectangle if (num_tiles <= d3.num_tiles()) { - ASSERTER(tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else if ( num_tiles <= plane_tiles && num_tiles % d3.num_tiles() == 0) { - ASSERTER(tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else if (num_tiles % (plane_tiles) == 0) { - ASSERTER(tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else { - ASSERTER(!tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); } } else if (start_tile % d3.num_tiles() == 0) { // aligned to a row within a plane, but not aligned to the plane // this is a rectangle if it is an integral number of rows, or // fits within a row if (num_tiles <= d3.num_tiles()) { - ASSERTER(tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else if ( num_tiles % d3.num_tiles() == 0 && (start_tile % plane_tiles) + num_tiles <= plane_tiles) { - ASSERTER(tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else { - ASSERTER(!tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); } } else { // unaligned, only a rectangle if it doesn't advance rows if (start_tile % d3.num_tiles() + num_tiles <= d3.num_tiles()) { - ASSERTER(tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else { - ASSERTER(!tt(start_tile, num_tiles)); + ASSERTER(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); } } } @@ -330,11 +335,11 @@ std::optional instance_domain_tile_offset( uint64_t start_tile, uint64_t num_tiles, Layout tile_order = Layout::ROW_MAJOR) { - const bool expect_rectangle = is_rectangular_domain( + const IsRectangularDomain expect_rectangle = is_rectangular_domain( tile_order, tile_extents, domain, start_tile, num_tiles); const std::optional adjusted_domain = domain_tile_offset( tile_order, tile_extents, domain, start_tile, num_tiles); - if (!expect_rectangle) { + if (expect_rectangle != IsRectangularDomain::Yes) { ASSERTER(!adjusted_domain.has_value()); return std::nullopt; } From bb361438eeccc226c216207ef0ef282d3f07ee37 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Sat, 1 Nov 2025 18:40:00 -0400 Subject: [PATCH 089/109] Update unit_tile to expect IsRectangularDomain::No or IsRectangularDomain::Never --- tiledb/sm/tile/test/unit_arithmetic.cc | 103 +++++++++++++------------ 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/tiledb/sm/tile/test/unit_arithmetic.cc b/tiledb/sm/tile/test/unit_arithmetic.cc index 3cb2cb18342..333762dd399 100644 --- a/tiledb/sm/tile/test/unit_arithmetic.cc +++ b/tiledb/sm/tile/test/unit_arithmetic.cc @@ -157,7 +157,7 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { if (num_tiles < 3 || num_tiles % 3 == 0) { CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else { - CHECK(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); + CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::No); } } } @@ -169,7 +169,7 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { if ((start_tile % 3) + num_tiles <= 3) { CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); } else { - CHECK(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); + CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::Never); } } } @@ -196,7 +196,7 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { num_tiles % d2.num_tiles() == 0) { ASSERTER(tt(t, num_tiles) == IsRectangularDomain::Yes); } else { - ASSERTER(tt(t, num_tiles) != IsRectangularDomain::Yes); + ASSERTER(tt(t, num_tiles) == IsRectangularDomain::No); } } // other tiles @@ -206,7 +206,7 @@ TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { if (((t + o) % d2.num_tiles()) + num_tiles <= d2.num_tiles()) { ASSERTER(tt(t + o, num_tiles) == IsRectangularDomain::Yes); } else { - ASSERTER(tt(t + o, num_tiles) != IsRectangularDomain::Yes); + ASSERTER(tt(t + o, num_tiles) == IsRectangularDomain::Never); } } } @@ -257,56 +257,59 @@ TEST_CASE("is_rectangular_domain 3d", "[arithmetic]") { * `{d1, d2, d3}` and asserts that `is_rectangular_domain` returns true if and * only if the pair represents an expected rectangle. */ - auto instance_is_rectangular_domain_3d = - [](Dim64 d1, Dim64 d2, Dim64 d3) { - auto tt = [&](uint64_t start_tile, - uint64_t num_tiles) -> IsRectangularDomain { - return is_rectangular_domain(d1, d2, d3, start_tile, num_tiles); - }; + auto instance_is_rectangular_domain_3d = []( + Dim64 d1, Dim64 d2, Dim64 d3) { + auto tt = [&](uint64_t start_tile, + uint64_t num_tiles) -> IsRectangularDomain { + return is_rectangular_domain(d1, d2, d3, start_tile, num_tiles); + }; - const uint64_t total_tiles = - d1.num_tiles() * d2.num_tiles() * d3.num_tiles(); - const uint64_t plane_tiles = d2.num_tiles() * d3.num_tiles(); + const uint64_t total_tiles = + d1.num_tiles() * d2.num_tiles() * d3.num_tiles(); + const uint64_t plane_tiles = d2.num_tiles() * d3.num_tiles(); - for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { - for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; - num_tiles++) { - if (start_tile % plane_tiles == 0) { - // aligned to a plane, several options to be a rectangle - if (num_tiles <= d3.num_tiles()) { - ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); - } else if ( - num_tiles <= plane_tiles && num_tiles % d3.num_tiles() == 0) { - ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); - } else if (num_tiles % (plane_tiles) == 0) { - ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); - } else { - ASSERTER(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); - } - } else if (start_tile % d3.num_tiles() == 0) { - // aligned to a row within a plane, but not aligned to the plane - // this is a rectangle if it is an integral number of rows, or - // fits within a row - if (num_tiles <= d3.num_tiles()) { - ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); - } else if ( - num_tiles % d3.num_tiles() == 0 && - (start_tile % plane_tiles) + num_tiles <= plane_tiles) { - ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); - } else { - ASSERTER(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); - } - } else { - // unaligned, only a rectangle if it doesn't advance rows - if (start_tile % d3.num_tiles() + num_tiles <= d3.num_tiles()) { - ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); - } else { - ASSERTER(tt(start_tile, num_tiles) != IsRectangularDomain::Yes); - } - } + for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; + num_tiles++) { + if (start_tile % plane_tiles == 0) { + // aligned to a plane, several options to be a rectangle + if (num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else if ( + num_tiles <= plane_tiles && num_tiles % d3.num_tiles() == 0) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else if (num_tiles % plane_tiles == 0) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::No); + } + } else if (start_tile % d3.num_tiles() == 0) { + // aligned to a row within a plane, but not aligned to the plane + // this is a rectangle if it is an integral number of rows, or + // fits within a row + if (num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else if ( + num_tiles % d3.num_tiles() == 0 && + (start_tile % plane_tiles) + num_tiles <= plane_tiles) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else if ((start_tile % plane_tiles) + num_tiles <= plane_tiles) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::No); + } else { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Never); + } + } else { + // unaligned, only a rectangle if it doesn't advance rows + if (start_tile % d3.num_tiles() + num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Never); } } - }; + } + } + }; SECTION("Shrinking") { instance_is_rectangular_domain_3d( From 1293f4b26fe3954f8e40bd2d86676fefcd878530 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Sat, 1 Nov 2025 22:02:45 -0400 Subject: [PATCH 090/109] Use IsRectangularDomain::Never to stop buffering tiles --- test/src/unit-cppapi-max-fragment-size.cc | 4 +- .../sm/query/writers/global_order_writer.cc | 41 +++++++++++++++---- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index ce34db8fd92..e5f350ab1cd 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -684,7 +684,9 @@ instance_dense_global_order( if (num_tiles == 0) { ASSERTER(rectangle == sm::IsRectangularDomain::Yes); } else { - ASSERTER(rectangle != sm::IsRectangularDomain::Yes); + // if `Never` then we should have started a new fragment + // to avoid buffering up until we hit the tile size + ASSERTER(rectangle == sm::IsRectangularDomain::No); } } } diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index dedb47bf068..07c44001e18 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -1621,14 +1621,19 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( tile_size += writer_tile_vectors[a]->at(t).filtered_size().value(); } + if (tile_size > + max_fragment_size_.value_or(std::numeric_limits::max())) { + throw GlobalOrderWriterException( + "Fragment size is too small to write a single tile"); + } + + bool should_start_new_fragment = false; + // NB: normally this should only hit once, but if there is a single // tile larger than the max fragment size it could hit twice and error - while (running_tiles_size + tile_size > - max_fragment_size_.value_or(std::numeric_limits::max())) { - if (running_tiles_size == 0) { - throw GlobalOrderWriterException( - "Fragment size is too small to write a single tile"); - } else if (fragment_end < 0) { + if (running_tiles_size + tile_size > + max_fragment_size_.value_or(std::numeric_limits::max())) { + if (fragment_end < 0) { if (fragment_size == 0) { throw GlobalOrderWriterException( "Fragment size is too small to subdivide dense subarray into " @@ -1636,6 +1641,24 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( } } + should_start_new_fragment = true; + } else if (dense() && max_fragment_size_.has_value()) { + // Dense fragments must have a rectangular domain. + // And all fragments must be smaller than `max_fragment_size_`. + // We must identify the highest tile number which satisfies both criteria. + const uint64_t fragment_start_tile = + write_state_start_tile + fragment_start; + const uint64_t maybe_num_tiles = + current_fragment_num_tiles_already_written + t - fragment_start + 1; + should_start_new_fragment = + (is_rectangular_domain( + array_schema_, + subarray_.ndrange(0), + fragment_start_tile, + maybe_num_tiles) == IsRectangularDomain::Never); + } + + if (should_start_new_fragment) { fragments.push_back(fragment_start); iassert(running_tiles_size >= fragment_size); @@ -1649,7 +1672,7 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( current_fragment_num_tiles_already_written = 0; } - bool is_part_of_fragment = true; + bool extends_fragment = true; if (dense() && max_fragment_size_.has_value()) { // Dense fragments must have a rectangular domain. // And all fragments must be smaller than `max_fragment_size_`. @@ -1658,14 +1681,14 @@ GlobalOrderWriter::identify_fragment_tile_boundaries( write_state_start_tile + fragment_start; const uint64_t maybe_num_tiles = current_fragment_num_tiles_already_written + t - fragment_start + 1; - is_part_of_fragment = + extends_fragment = (is_rectangular_domain( array_schema_, subarray_.ndrange(0), fragment_start_tile, maybe_num_tiles) == IsRectangularDomain::Yes); } - if (is_part_of_fragment) { + if (extends_fragment) { fragment_size = running_tiles_size + tile_size; fragment_end = static_cast(t + 1); } From 2b9ed25844d14d212fca46a9ac69862f1feb1c0b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 3 Nov 2025 09:30:42 -0500 Subject: [PATCH 091/109] Use VFSTestSetup and per-test array_name --- test/src/unit-cppapi-max-fragment-size.cc | 69 ++++++++++++++++++----- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index e5f350ab1cd..16d45b51c03 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -38,6 +38,7 @@ #include "test/support/src/array_templates.h" #include "test/support/src/fragment_info_helpers.h" #include "test/support/src/helpers.h" +#include "test/support/src/vfs_helpers.h" #include "tiledb/api/c_api/array_schema/array_schema_api_internal.h" #include "tiledb/api/c_api/fragment_info/fragment_info_api_internal.h" #include "tiledb/api/c_api/subarray/subarray_api_internal.h" @@ -551,14 +552,13 @@ template std::vector>> instance_dense_global_order( const Context& ctx, + const std::string& array_name, tiledb_layout_t tile_order, tiledb_layout_t cell_order, uint64_t max_fragment_size, const std::vector>& dimensions, const std::vector>& subarray, std::optional write_unit_num_cells = std::nullopt) { - const std::string array_name = "max_fragment_size_dense_global_order"; - const std::optional num_cells = subarray_num_cells(subarray); ASSERTER(num_cells.has_value()); @@ -733,11 +733,12 @@ instance_dense_global_order( * for global order writes to dense arrays. */ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { - const std::string array_name = - "cppapi_consolidation_dense_domain_arithmetic_overflow"; - + VFSTestSetup vfs; Context ctx; + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order"); + const tiledb_layout_t tile_order = GENERATE(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); const tiledb_layout_t cell_order = @@ -773,6 +774,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { << write_unit_num_cells) { const auto actual = instance_dense_global_order( ctx, + array_name, tile_order, cell_order, max_fragment_size, @@ -831,6 +833,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { REQUIRE_THROWS( instance_dense_global_order( ctx, + array_name, tile_order, cell_order, max_fragment_size, @@ -845,6 +848,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { REQUIRE_THROWS( instance_dense_global_order( ctx, + array_name, tile_order, cell_order, max_fragment_size, @@ -881,6 +885,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const auto actual = instance_dense_global_order( ctx, + array_name, tile_order, cell_order, max_fragment_size, @@ -937,6 +942,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { REQUIRE_THROWS( instance_dense_global_order( ctx, + array_name, tile_order, cell_order, max_fragment_size, @@ -950,6 +956,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { "the subarray must coincide with the tile bounds"); REQUIRE_THROWS(instance_dense_global_order( ctx, + array_name, tile_order, cell_order, max_fragment_size, @@ -993,6 +1000,7 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const auto actual = instance_dense_global_order( ctx, + array_name, tile_order, cell_order, max_fragment_size, @@ -1018,7 +1026,13 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const uint64_t max_fragment_size = 24; instance_dense_global_order( - ctx, tile_order, cell_order, max_fragment_size, {d1, d2}, {s1, s2}); + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {d1, d2}, + {s1, s2}); } SECTION("Example 2") { @@ -1029,7 +1043,13 @@ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { const uint64_t max_fragment_size = 28; instance_dense_global_order( - ctx, tile_order, cell_order, max_fragment_size, {d1, d2}, {s1, s2}); + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {d1, d2}, + {s1, s2}); } } } @@ -1119,7 +1139,9 @@ make_tile_aligned_subarray( */ template void rapidcheck_dense_array( - Context& ctx, const std::vector>& dimensions) { + Context& ctx, + const std::string& array_name, + const std::vector>& dimensions) { uint64_t num_cells_per_tile = 1; for (const auto& dim : dimensions) { num_cells_per_tile *= dim.extent; @@ -1169,6 +1191,7 @@ void rapidcheck_dense_array( instance_dense_global_order( ctx, + array_name, tile_order, cell_order, max_fragment_size, @@ -1184,11 +1207,16 @@ TEST_CASE( using Dim64 = templates::Dimension
; using Dom64 = Dim64::domain_type; + VFSTestSetup vfs; Context ctx; + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_1d"); + SECTION("Shrinking") { instance_dense_global_order( ctx, + array_name, TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR, 2396, @@ -1196,10 +1224,10 @@ TEST_CASE( {Dom64(0, 2969)}); } - rc::prop("max fragment size dense 1d", [&ctx]() { + rc::prop("max fragment size dense 1d", [&]() { Dim64 d1 = *rc::make_dimension
(8192); - rapidcheck_dense_array
(ctx, {d1}); + rapidcheck_dense_array
(ctx, array_name, {d1}); }); } @@ -1210,12 +1238,17 @@ TEST_CASE( using Dim64 = templates::Dimension
; using Dom64 = Dim64::domain_type; + VFSTestSetup vfs; Context ctx; + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_2d"); + SECTION("Shrinking") { SECTION("Example 1") { instance_dense_global_order( ctx, + array_name, TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR, 48, @@ -1226,6 +1259,7 @@ TEST_CASE( SECTION("Example 2") { instance_dense_global_order( ctx, + array_name, TILEDB_COL_MAJOR, TILEDB_ROW_MAJOR, 24, @@ -1236,6 +1270,7 @@ TEST_CASE( SECTION("Example 3") { instance_dense_global_order( ctx, + array_name, TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR, 48, @@ -1251,6 +1286,7 @@ TEST_CASE( */ auto fragments = instance_dense_global_order( ctx, + array_name, TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR, 924, @@ -1260,11 +1296,11 @@ TEST_CASE( } } - rc::prop("max fragment size dense 2d", [&ctx]() { + rc::prop("max fragment size dense 2d", [&]() { Dim64 d1 = *rc::make_dimension
(128); Dim64 d2 = *rc::make_dimension
(128); - rapidcheck_dense_array
(ctx, {d1, d2}); + rapidcheck_dense_array
(ctx, array_name, {d1, d2}); }); } @@ -1275,11 +1311,16 @@ TEST_CASE( using Dim64 = templates::Dimension
; using Dom64 = Dim64::domain_type; + VFSTestSetup vfs; Context ctx; + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_3d"); + SECTION("Shrinking") { instance_dense_global_order( ctx, + array_name, TILEDB_ROW_MAJOR, TILEDB_ROW_MAJOR, 2160, @@ -1287,11 +1328,11 @@ TEST_CASE( {Dom64(5, 19), Dom64(4, 15), Dom64(1, 6)}); } - rc::prop("max fragment size dense 3d", [&ctx]() { + rc::prop("max fragment size dense 3d", [&]() { Dim64 d1 = *rc::make_dimension
(32); Dim64 d2 = *rc::make_dimension
(32); Dim64 d3 = *rc::make_dimension
(32); - rapidcheck_dense_array
(ctx, {d1, d2, d3}); + rapidcheck_dense_array
(ctx, array_name, {d1, d2, d3}); }); } From 9b60e8fca36a66ba48990c903f6c794d35a1cf72 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 3 Nov 2025 14:38:14 -0500 Subject: [PATCH 092/109] Revert back to initial fragment allocation since it helps serialization --- tiledb/sm/query/writers/global_order_writer.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 07c44001e18..fbd844e5fb5 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -278,6 +278,11 @@ Status GlobalOrderWriter::alloc_global_write_state() { "properly finalized")); global_write_state_.reset(tdb_new(GlobalWriteState, query_memory_tracker_)); + // Alloc FragmentMetadata object + global_write_state_->frag_meta_ = this->create_fragment_metadata(); + // Used in serialization when FragmentMetadata is built from ground up + global_write_state_->frag_meta_->set_context_resources(&resources_); + return Status::Ok(); } @@ -889,6 +894,7 @@ Status GlobalOrderWriter::global_write() { // Initialize the global write state if this is the first invocation if (!global_write_state_) { RETURN_CANCEL_OR_ERROR(alloc_global_write_state()); + RETURN_NOT_OK(create_fragment(dense(), global_write_state_->frag_meta_)); RETURN_CANCEL_OR_ERROR(init_global_write_state()); } From f4398d30fe5fe51c94f02aa5530ff3c8928a88bb Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 3 Nov 2025 21:56:01 -0500 Subject: [PATCH 093/109] Disable dense global order writes setting max fragment size for REST --- test/src/unit-cppapi-max-fragment-size.cc | 37 +++++++++++++++++++++++ tiledb/sm/query/query.cc | 17 ++++++++--- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 16d45b51c03..1e9a2d914bf 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1336,3 +1336,40 @@ TEST_CASE( rapidcheck_dense_array
(ctx, array_name, {d1, d2, d3}); }); } + +TEST_CASE( + "C++ API: Max fragment size dense unsupported on REST", "[cppapi][rest]") { + VFSTestSetup vfs; + if (!vfs.is_rest()) { + SKIP("Test is only applicable to REST client"); + } + + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_rest_support"); + + Context ctx; + + using Dim = templates::Dimension; + using Dom = Dim::domain_type; + + Dim d1(0, 0, 1); + Dim d2(0, 0, 1); + Dom s1(0, 0); + Dom s2(0, 0); + const uint64_t max_fragment_size = 24; + + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is not supported for remote global order writes to dense " + "arrays."); + + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + array_name, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + max_fragment_size, + {d1, d2}, + {s1, s2}), + expect); +} diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 7f09475788a..751511714ab 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -1637,10 +1637,19 @@ Status Query::submit() { } // Make sure fragment size is only set for global order. - if (fragment_size_.has_value() && - (layout_ != Layout::GLOBAL_ORDER || type_ != QueryType::WRITE)) { - throw QueryException( - "[submit] Fragment size is only supported for global order writes."); + if (fragment_size_.has_value()) { + if (layout_ != Layout::GLOBAL_ORDER || type_ != QueryType::WRITE) { + throw QueryException( + "[submit] Fragment size is only supported for global order writes."); + } else if (array_schema_->dense() && array_->is_remote()) { + // For dense arrays, `max_fragment_size_` requires buffering of a trail of + // filtered tiles which may not fit in a target fragment. This trail of + // tiles is not serializable. As such `max_fragment_size` cannot be used + // with remote dense array writes. + throw QueryException( + "[submit] Fragment size is not supported for remote global order " + "writes to dense arrays."); + } } // Check attribute/dimensions buffers completeness before query submits From 2e9d9d23af8cb7cd46105855733291faed6e6956 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Mon, 3 Nov 2025 21:57:23 -0500 Subject: [PATCH 094/109] Genericize instance_dense_global_order --- test/src/unit-cppapi-max-fragment-size.cc | 118 ++++++++++++--- test/src/unit-sparse-global-order-reader.cc | 24 +-- test/support/src/array_templates.h | 153 +++++++++++++++++--- 3 files changed, 233 insertions(+), 62 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 1e9a2d914bf..f13218185e7 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -548,7 +548,7 @@ std::optional subarray_num_cells( * * @return a list of the domains written to each fragment in ascending order */ -template +template std::vector>> instance_dense_global_order( const Context& ctx, @@ -558,10 +558,8 @@ instance_dense_global_order( uint64_t max_fragment_size, const std::vector>& dimensions, const std::vector>& subarray, + const F& attributes, std::optional write_unit_num_cells = std::nullopt) { - const std::optional num_cells = subarray_num_cells(subarray); - ASSERTER(num_cells.has_value()); - Domain domain(ctx); for (uint64_t d = 0; d < dimensions.size(); d++) { const std::string dname = "d" + std::to_string(d); @@ -573,23 +571,28 @@ instance_dense_global_order( domain.add_dimension(dim); } - auto a = Attribute::create(ctx, "a"); ArraySchema schema(ctx, TILEDB_DENSE); schema.set_domain(domain); schema.set_tile_order(tile_order); schema.set_cell_order(cell_order); - schema.add_attributes(a); + + const std::vector> ddl_attributes = + templates::ddl::physical_type_attributes(); + for (uint64_t a = 0; a < ddl_attributes.size(); a++) { + const std::string aname = "a" + std::to_string(a + 1); + auto aa = + Attribute::create( + ctx, + aname, + static_cast(std::get<0>(ddl_attributes[a]))) + .set_cell_val_num(std::get<1>(ddl_attributes[a])) + .set_nullable(std::get<2>(ddl_attributes[a])); + schema.add_attribute(aa); + } Array::create(array_name, schema); test::DeleteArrayGuard del(ctx.ptr().get(), array_name.c_str()); - const int a_offset = 77; - std::vector a_write; - a_write.reserve(num_cells.value()); - for (int i = 0; i < static_cast(num_cells.value()); i++) { - a_write.push_back(a_offset + i); - } - std::vector api_subarray; api_subarray.reserve(2 * subarray.size()); for (const auto& sub_dim : subarray) { @@ -605,6 +608,7 @@ instance_dense_global_order( sm::NDRange smsubarray; // write data, should be split into multiple fragments + templates::query::fragment_field_sizes_t cursor; { Array array(ctx, array_name, TILEDB_WRITE); @@ -626,16 +630,39 @@ instance_dense_global_order( .expand_to_tiles_when_no_current_domain(smsubarray_aligned); uint64_t cells_written = 0; - while (cells_written < a_write.size()) { + while (templates::query::num_cells(attributes, cursor) < + attributes.num_cells()) { const uint64_t cells_this_write = std::min( - a_write.size() - cells_written, - write_unit_num_cells.value_or(a_write.size())); - query.set_data_buffer("a", &a_write[cells_written], cells_this_write); + attributes.num_cells() - cells_written, + write_unit_num_cells.value_or(attributes.num_cells())); + + auto field_sizes = templates::query::write_make_field_sizes( + attributes, + cells_written, + write_unit_num_cells.value_or(attributes.num_cells())); + + templates::query::set_fields( + ctx.ptr().get(), + query.ptr().get(), + field_sizes, + const_cast(attributes), + [](unsigned d) { return "d" + std::to_string(d + 1); }, + [](unsigned a) { return "a" + std::to_string(a + 1); }, + cursor); const auto status = query.submit(); ASSERTER(status == Query::Status::COMPLETE); - cells_written += cells_this_write; + templates::query::accumulate_cursor(attributes, cursor, field_sizes); + + const uint64_t cells_written_this_write = + templates::query::num_cells(attributes, field_sizes); + ASSERTER(cells_written_this_write == cells_this_write); + + cells_written += cells_written_this_write; + ASSERTER( + cells_written == + templates::query::num_cells(attributes, cursor)); const auto w = dynamic_cast( query.ptr()->query_->strategy()); @@ -695,9 +722,9 @@ instance_dense_global_order( } // then read back - std::vector a_read; + F read; { - a_read.resize(a_write.size()); + templates::query::resize(read, cursor); Array array(ctx, array_name, TILEDB_READ); @@ -707,10 +734,21 @@ instance_dense_global_order( Query query(ctx, array, TILEDB_READ); query.set_layout(TILEDB_GLOBAL_ORDER); query.set_subarray(sub); - query.set_data_buffer("a", a_read); + + auto read_field_sizes = + templates::query::make_field_sizes(read); + templates::query::set_fields( + ctx.ptr().get(), + query.ptr().get(), + read_field_sizes, + read, + [](unsigned d) { return "d" + std::to_string(d + 1); }, + [](unsigned a) { return "a" + std::to_string(a + 1); }); auto st = query.submit(); ASSERTER(st == Query::Status::COMPLETE); + + ASSERTER(read_field_sizes == cursor); } const std::vector>> fragment_domains = @@ -723,11 +761,47 @@ instance_dense_global_order( max_fragment_size); // this is last because a fragment domain mismatch is more informative - ASSERTER(a_read == a_write); + ASSERTER(read == attributes); return fragment_domains; } +template +std::vector>> +instance_dense_global_order( + const Context& ctx, + const std::string& array_name, + tiledb_layout_t tile_order, + tiledb_layout_t cell_order, + uint64_t max_fragment_size, + const std::vector>& dimensions, + const std::vector>& subarray, + std::optional write_unit_num_cells = std::nullopt) { + const std::optional num_cells = subarray_num_cells(subarray); + ASSERTER(num_cells.has_value()); + + const int a_offset = 77; + std::vector a_write; + a_write.reserve(num_cells.value()); + for (int i = 0; i < static_cast(num_cells.value()); i++) { + a_write.push_back(a_offset + i); + } + + templates::Fragment, std::tuple> attributes; + std::get<0>(attributes.attributes()) = a_write; + + return instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + attributes, + write_unit_num_cells); +} + /** * Tests that the max fragment size parameter is properly respected * for global order writes to dense arrays. diff --git a/test/src/unit-sparse-global-order-reader.cc b/test/src/unit-sparse-global-order-reader.cc index 0a7a519f076..d89ccd4da3e 100644 --- a/test/src/unit-sparse-global-order-reader.cc +++ b/test/src/unit-sparse-global-order-reader.cc @@ -3713,19 +3713,7 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { ASSERTER(cursor_cells + num_cells <= expect.size()); // accumulate - std::apply( - [&](auto&... field) { - std::apply( - [&](auto&... field_cursor) { - std::apply( - [&](const auto&... field_size) { - (field.accumulate_cursor(field_cursor, field_size), ...); - }, - field_sizes); - }, - outcursor); - }, - std::tuple_cat(outdims, outatts)); + templates::query::accumulate_cursor(out, outcursor, field_sizes); if (status == TILEDB_COMPLETED) { break; @@ -3735,15 +3723,7 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { // Clean up. tiledb_query_free(&query); - std::apply( - [outcursor](auto&... outfield) { - std::apply( - [&](const auto&... field_cursor) { - (outfield.finish_multipart_read(field_cursor), ...); - }, - outcursor); - }, - std::tuple_cat(outdims, outatts)); + templates::query::resize(out, outcursor); ASSERTER(expect.dimensions() == outdims); diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 3940635f0d0..4acf3f45485 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -393,8 +393,13 @@ struct query_buffers { return *this; } + query_field_size_type make_field_size( + uint64_t offset, uint64_t cell_limit) const { + return sizeof(T) * std::min(cell_limit, values_.size() - offset); + } + query_field_size_type make_field_size(uint64_t cell_limit) const { - return sizeof(T) * std::min(cell_limit, values_.size()); + return make_field_size(0, cell_limit); } int32_t attach_to_query( @@ -426,11 +431,12 @@ struct query_buffers { } void accumulate_cursor( - query_field_size_type& cursor, const query_field_size_type& field_sizes) { + query_field_size_type& cursor, + const query_field_size_type& field_sizes) const { cursor += field_sizes; } - void finish_multipart_read(const query_field_size_type& cursor) { + void resize_to_cursor(const query_field_size_type& cursor) { resize(cursor / sizeof(T)); } @@ -632,14 +638,20 @@ struct query_buffers> { return *this; } - query_field_size_type make_field_size(uint64_t cell_limit) const { + query_field_size_type make_field_size( + uint64_t offset, uint64_t cell_limit) const { const uint64_t values_size = - sizeof(T) * std::min(cell_limit, values_.size()); + sizeof(T) * std::min(cell_limit, values_.size() - offset); const uint64_t validity_size = - sizeof(uint8_t) * std::min(cell_limit, validity_.size()); + sizeof(uint8_t) * + std::min(cell_limit, validity_.size() - offset); return std::make_pair(values_size, validity_size); } + query_field_size_type make_field_size(uint64_t cell_limit) const { + return make_field_size(0, cell_limit); + } + int32_t attach_to_query( tiledb_ctx_t* ctx, tiledb_query_t* query, @@ -681,12 +693,13 @@ struct query_buffers> { } void accumulate_cursor( - query_field_size_type& cursor, const query_field_size_type& field_sizes) { + query_field_size_type& cursor, + const query_field_size_type& field_sizes) const { std::get<0>(cursor) += std::get<0>(field_sizes); std::get<1>(cursor) += std::get<1>(field_sizes); } - void finish_multipart_read(const query_field_size_type& cursor) { + void resize_to_cursor(const query_field_size_type& cursor) { resize(std::get<0>(cursor) / sizeof(T)); } }; @@ -785,13 +798,26 @@ struct query_buffers> { return *this; } - query_field_size_type make_field_size(uint64_t cell_limit) const { - const uint64_t values_size = sizeof(T) * values_.size(); + query_field_size_type make_field_size( + uint64_t cell_offset, uint64_t cell_limit) const { const uint64_t offsets_size = - sizeof(uint64_t) * std::min(cell_limit, offsets_.size()); + sizeof(uint64_t) * + std::min(cell_limit, offsets_.size() - cell_offset); + + uint64_t values_size; + if (offsets_.size() - cell_offset <= cell_limit) { + values_size = sizeof(T) * (values_.size() - cell_offset); + } else { + values_size = sizeof(T) * (offsets_[cell_offset + cell_limit] - + offsets_[cell_offset]); + } return std::make_pair(values_size, offsets_size); } + query_field_size_type make_field_size(uint64_t cell_limit) const { + return make_field_size(0, cell_limit); + } + int32_t attach_to_query( tiledb_ctx_t* ctx, tiledb_query_t* query, @@ -841,12 +867,13 @@ struct query_buffers> { } void accumulate_cursor( - query_field_size_type& cursor, const query_field_size_type& field_sizes) { + query_field_size_type& cursor, + const query_field_size_type& field_sizes) const { std::get<0>(cursor) += std::get<0>(field_sizes); std::get<1>(cursor) += std::get<1>(field_sizes); } - void finish_multipart_read(const query_field_size_type& cursor) { + void resize_to_cursor(const query_field_size_type& cursor) { values_.resize(std::get<0>(cursor) / sizeof(T)); offsets_.resize(std::get<1>(cursor) / sizeof(uint64_t)); } @@ -953,15 +980,29 @@ struct query_buffers>> { return *this; } - query_field_size_type make_field_size(uint64_t cell_limit) const { - const uint64_t values_size = sizeof(T) * values_.size(); + query_field_size_type make_field_size( + uint64_t cell_offset, uint64_t cell_limit) const { const uint64_t offsets_size = sizeof(uint64_t) * std::min(cell_limit, offsets_.size()); const uint64_t validity_size = - sizeof(uint8_t) * std::min(cell_limit, validity_.size()); + sizeof(uint8_t) * + std::min(cell_limit, validity_.size() - cell_offset); + + uint64_t values_size; + if (offsets_.size() - cell_offset <= cell_limit) { + values_size = sizeof(T) * (values_.size() - cell_offset); + } else { + values_size = sizeof(T) * (offsets_[cell_offset + cell_limit] - + offsets_[cell_offset]); + } + return std::make_tuple(values_size, offsets_size, validity_size); } + query_field_size_type make_field_size(uint64_t cell_limit) const { + return make_field_size(0, cell_limit); + } + int32_t attach_to_query( tiledb_ctx_t* ctx, tiledb_query_t* query, @@ -1024,13 +1065,14 @@ struct query_buffers>> { } void accumulate_cursor( - query_field_size_type& cursor, const query_field_size_type& field_sizes) { + query_field_size_type& cursor, + const query_field_size_type& field_sizes) const { std::get<0>(cursor) += std::get<0>(field_sizes); std::get<1>(cursor) += std::get<1>(field_sizes); std::get<2>(cursor) += std::get<2>(field_sizes); } - void finish_multipart_read(const query_field_size_type& cursor) { + void resize_to_cursor(const query_field_size_type& cursor) { values_.resize(std::get<0>(cursor) / sizeof(T)); offsets_.resize(std::get<1>(cursor) / sizeof(uint64_t)); validity_.resize(std::get<2>(cursor) / sizeof(uint8_t)); @@ -1263,6 +1305,28 @@ struct query_applicator { fields); } + /** + * @return a tuple containing the size of each input field to write for a + * range of input cells [cell_offset, cell_offset + cell_limit] + */ + static auto write_make_field_sizes( + const std::tuple&...> fields, + uint64_t cell_offset, + uint64_t cell_limit = std::numeric_limits::max()) { + std::optional num_cells; + auto write_make_field_size = [&]( + const query_buffers& field) { + const auto field_size = field.make_field_size(cell_offset, cell_limit); + return field_size; + }; + + return std::apply( + [&](const auto&... field) { + return std::make_tuple(write_make_field_size(field)...); + }, + fields); + } + /** * Sets buffers on `query` for the variadic `fields` and `fields_sizes` */ @@ -1357,6 +1421,19 @@ using fragment_field_sizes_t = decltype(make_field_sizes( std::declval(), std::declval())); +template +fragment_field_sizes_t write_make_field_sizes( + const F& fragment, + uint64_t cell_offset, + uint64_t cell_limit = std::numeric_limits::max()) { + typename F::DimensionBuffersConstRef dims = fragment.dimensions(); + typename F::AttributeBuffersConstRef atts = fragment.attributes(); + return [cell_offset, cell_limit](std::tuple fields) { + return query_applicator...>:: + write_make_field_sizes(fields, cell_offset, cell_limit); + }(std::tuple_cat(dims, atts)); +} + /** * Apply field cursor and sizes to each field of `fragment`. */ @@ -1382,6 +1459,46 @@ void apply_cursor( std::tuple_cat(dims, atts)); } +/** + * Advances field cursors `cursor` over `fragment` by the amount of data from + * `field_sizes` + */ +template +void accumulate_cursor( + const F& fragment, + fragment_field_sizes_t& cursor, + const fragment_field_sizes_t& field_sizes) { + std::apply( + [&](auto&... field) { + std::apply( + [&](auto&... field_cursor) { + std::apply( + [&](const auto&... field_size) { + (field.accumulate_cursor(field_cursor, field_size), ...); + }, + field_sizes); + }, + cursor); + }, + std::tuple_cat(fragment.dimensions(), fragment.attributes())); +} + +/** + * Resizes the fields of `fragment` to the sizes given by `cursor`. + */ +template +void resize(F& fragment, const fragment_field_sizes_t& cursor) { + std::apply( + [cursor](auto&... field) { + std::apply( + [&](const auto&... field_cursor) { + (field.resize_to_cursor(field_cursor), ...); + }, + cursor); + }, + std::tuple_cat(fragment.dimensions(), fragment.attributes())); +} + /** * Set buffers on `query` for the tuple of field columns */ From 5dd97f41ca72d06a1d6c89c2a702d54c40b0bfef Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 11:36:28 -0500 Subject: [PATCH 095/109] Fix bad value_size with var length query_buffers --- test/support/src/array_templates.h | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 4acf3f45485..268a8e3f12f 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -804,13 +804,10 @@ struct query_buffers> { sizeof(uint64_t) * std::min(cell_limit, offsets_.size() - cell_offset); - uint64_t values_size; - if (offsets_.size() - cell_offset <= cell_limit) { - values_size = sizeof(T) * (values_.size() - cell_offset); - } else { - values_size = sizeof(T) * (offsets_[cell_offset + cell_limit] - - offsets_[cell_offset]); - } + // NB: unlike `offsets_size` this can just be the whole buffer + // since offsets is what determines the values + const uint64_t values_size = sizeof(T) * values_.size(); + return std::make_pair(values_size, offsets_size); } @@ -983,18 +980,15 @@ struct query_buffers>> { query_field_size_type make_field_size( uint64_t cell_offset, uint64_t cell_limit) const { const uint64_t offsets_size = - sizeof(uint64_t) * std::min(cell_limit, offsets_.size()); + sizeof(uint64_t) * + std::min(cell_limit, offsets_.size() - cell_offset); const uint64_t validity_size = sizeof(uint8_t) * std::min(cell_limit, validity_.size() - cell_offset); - uint64_t values_size; - if (offsets_.size() - cell_offset <= cell_limit) { - values_size = sizeof(T) * (values_.size() - cell_offset); - } else { - values_size = sizeof(T) * (offsets_[cell_offset + cell_limit] - - offsets_[cell_offset]); - } + // NB: unlike the above this can just be the whole buffer + // since offsets is what determines the values + const uint64_t values_size = sizeof(T) * values_.size(); return std::make_tuple(values_size, offsets_size, validity_size); } From 888099c3122b98ccc1a8c3dfd1bfd136b7e7e03e Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 11:37:10 -0500 Subject: [PATCH 096/109] Add simple var-length example --- test/src/unit-cppapi-max-fragment-size.cc | 65 +++++++++++++++++++++++ test/support/src/array_templates.h | 15 ++++++ 2 files changed, 80 insertions(+) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index f13218185e7..169536208d1 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1411,6 +1411,70 @@ TEST_CASE( }); } +/** + * Test some edge cases induced by variable-length tiles + */ +TEST_CASE( + "C++ API: Max fragment size dense array var size tiles", + "[cppapi][max-frag-size]") { + VFSTestSetup vfs; + Context ctx; + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_var"); + + using Dim64 = templates::Dimension; + using Dom64 = Dim64::domain_type; + + using F = templates::Fragment, std::tuple>>; + + const tiledb_layout_t tile_order = TILEDB_ROW_MAJOR; + const tiledb_layout_t cell_order = TILEDB_ROW_MAJOR; + + SECTION("Rectangle tiles") { + const uint64_t d1_extent = 8; + const uint64_t d2_span = 10000; + REQUIRE(d2_span % d1_extent == 0); + + const uint64_t d2_extent = d2_span / d1_extent; + + const Dim64 row(0, std::numeric_limits::max() - 1, d1_extent); + const Dim64 col(0, d2_span - 1, d2_extent); + + const Dom64 subrow(0, d1_extent - 1); + const Dom64 subcol = (col.domain); + + const std::optional num_cells = + subarray_num_cells(std::vector{subrow, subcol}); + REQUIRE(num_cells.has_value()); + + F attributes; + attributes.reserve(num_cells.value()); + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = std::to_string(c); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const uint64_t max_fragment_size = 4 * 64 * 1024; + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes); + + std::vector> expect; + expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back( + {subrow, Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); + } +} + TEST_CASE( "C++ API: Max fragment size dense unsupported on REST", "[cppapi][rest]") { VFSTestSetup vfs; @@ -1447,3 +1511,4 @@ TEST_CASE( {s1, s2}), expect); } + diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 268a8e3f12f..a40c956c051 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -1641,6 +1641,13 @@ namespace ddl { template struct cell_type_traits; +template <> +struct cell_type_traits { + static constexpr sm::Datatype physical_type = sm::Datatype::CHAR; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool is_nullable = false; +}; + template <> struct cell_type_traits { static constexpr sm::Datatype physical_type = sm::Datatype::INT32; @@ -1655,6 +1662,14 @@ struct cell_type_traits { static constexpr bool is_nullable = false; }; +template +struct cell_type_traits> { + static constexpr sm::Datatype physical_type = + cell_type_traits::physical_type; + static constexpr uint32_t cell_val_num = std::numeric_limits::max(); + static constexpr bool is_nullable = false; +}; + template std::vector> physical_type_attributes() { std::vector> ret; From 3a3549462a18a634126026594d0048b682827dc2 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 11:58:51 -0500 Subject: [PATCH 097/109] Some Skew examples --- test/src/unit-cppapi-max-fragment-size.cc | 149 +++++++++++++++++++--- 1 file changed, 129 insertions(+), 20 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 169536208d1..4c8affc724b 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1447,31 +1447,140 @@ TEST_CASE( subarray_num_cells(std::vector{subrow, subcol}); REQUIRE(num_cells.has_value()); + const uint64_t max_fragment_size = 4 * 64 * 1024; + + SECTION("Even") { + F attributes; + attributes.reserve(num_cells.value()); + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = std::to_string(c); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes); + + std::vector> expect; + expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back( + {subrow, + Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); + } + + const uint64_t num_cells_per_tile = d1_extent * d2_extent; + F attributes; attributes.reserve(num_cells.value()); - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = std::to_string(c); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); + + SECTION("Skew first tile") { + // inflate all the records of the first tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (c < num_cells_per_tile ? "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes); + + std::vector> expect; + expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent * 1 / 4) - 1)}); + expect.push_back( + {subrow, + Dom64( + d2_extent * d1_extent * 1 / 4, + (d2_extent * d1_extent * 3 / 4) - 1)}); + expect.push_back( + {subrow, + Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); } - const uint64_t max_fragment_size = 4 * 64 * 1024; + SECTION("Skew second tile") { + // inflate all the records of the second tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (num_cells_per_tile <= c && c < 2 * num_cells_per_tile ? + "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes); - - std::vector> expect; - expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); - expect.push_back( - {subrow, Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes); + + std::vector> expect; + expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 4) - 1)}); + expect.push_back( + {subrow, + Dom64( + d2_extent * d1_extent * 1 / 4, + (d2_extent * d1_extent * 3 / 4) - 1)}); + expect.push_back( + {subrow, + Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); + } + + SECTION("Skew last tile") { + // inflate all the records of the last tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (num_cells.value() - num_cells_per_tile <= c ? + "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes); + + std::vector> expect; + expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back( + {subrow, + Dom64( + d2_extent * d1_extent / 2, + (d2_extent * d1_extent * 7 / 8) - 1)}); + expect.push_back( + {subrow, + Dom64(d2_extent * d1_extent * 7 / 8, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); + } } } From a93b368b9383fd148a7ef75e27db6d209a839f0c Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 14:50:22 -0500 Subject: [PATCH 098/109] Really fix make_field_size with offset --- test/support/src/array_templates.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index a40c956c051..933fd8a7608 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -800,13 +800,20 @@ struct query_buffers> { query_field_size_type make_field_size( uint64_t cell_offset, uint64_t cell_limit) const { + const uint64_t num_cells = + std::min(cell_limit, offsets_.size() - cell_offset); + const uint64_t offsets_size = sizeof(uint64_t) * - std::min(cell_limit, offsets_.size() - cell_offset); + std::min(num_cells, offsets_.size() - cell_offset); - // NB: unlike `offsets_size` this can just be the whole buffer - // since offsets is what determines the values - const uint64_t values_size = sizeof(T) * values_.size(); + uint64_t values_size; + if (cell_offset + num_cells + 1 < offsets_.size()) { + values_size = sizeof(T) * + (offsets_[cell_offset + num_cells] - offsets_[cell_offset]); + } else { + values_size = sizeof(T) * (values_.size() - offsets_[cell_offset]); + } return std::make_pair(values_size, offsets_size); } From 6f063f331349bff53622d7d5230616f46fff8d0b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 14:50:41 -0500 Subject: [PATCH 099/109] make_field_sizes const ref --- test/support/src/array_templates.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 933fd8a7608..8ef5af85023 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -1283,7 +1283,7 @@ struct query_applicator { * @return a tuple containing the size of each input field */ static auto make_field_sizes( - const std::tuple&...> fields, + const std::tuple&...> fields, uint64_t cell_limit = std::numeric_limits::max()) { std::optional num_cells; auto make_field_size = [&](const query_buffers& field) { @@ -1408,9 +1408,10 @@ namespace query { */ template auto make_field_sizes( - F& fragment, uint64_t cell_limit = std::numeric_limits::max()) { - typename F::DimensionBuffersRef dims = fragment.dimensions(); - typename F::AttributeBuffersRef atts = fragment.attributes(); + const F& fragment, + uint64_t cell_limit = std::numeric_limits::max()) { + typename F::DimensionBuffersConstRef dims = fragment.dimensions(); + typename F::AttributeBuffersConstRef atts = fragment.attributes(); return [cell_limit](std::tuple fields) { return query_applicator::make_field_sizes( fields, cell_limit); @@ -1420,7 +1421,7 @@ auto make_field_sizes( template using fragment_field_sizes_t = decltype(make_field_sizes( - std::declval(), std::declval())); + std::declval(), std::declval())); template fragment_field_sizes_t write_make_field_sizes( From e29a58233098f4fb255822c3b2a511d3526991f5 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 14:51:01 -0500 Subject: [PATCH 100/109] Fragment::slice --- test/support/src/array_templates.h | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 8ef5af85023..89f5fdc185a 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -448,6 +448,12 @@ struct query_buffers { values_.insert(std::forward(args)...); } + self_type slice(uint64_t cell_start, uint64_t num_cells) const { + return self_type(std::vector( + values_.begin() + cell_start, + values_.begin() + cell_start + num_cells)); + } + auto begin() { return values_.begin(); } @@ -632,6 +638,15 @@ struct query_buffers> { validity_.end(), from.validity_.begin(), from.validity_.end()); } + self_type slice(uint64_t cell_start, uint64_t num_cells) const { + self_type ret; + ret.values_ = std::vector( + values_.begin() + cell_start, values_.begin() + num_cells); + ret.validity_ = std::vector( + validity_.begin() + cell_start, validity_.begin() + num_cells); + return ret; + } + self_type& operator=(const self_type& other) { values_ = other.values_; validity_ = other.validity_; @@ -792,6 +807,31 @@ struct query_buffers> { values_.insert(values_.end(), from.values_.begin(), from.values_.end()); } + self_type slice(uint64_t cell_start, uint64_t num_cells) const { + std::vector slice_offsets( + offsets_.begin() + cell_start, + offsets_.begin() + cell_start + num_cells); + std::vector slice_values; + for (uint64_t o = cell_start; o < cell_start + num_cells; o++) { + const uint64_t end = + (o + 1 == offsets_.size() ? values_.size() : offsets_[o + 1]); + slice_values.insert( + slice_values.end(), + values_.begin() + offsets_[o], + values_.begin() + end); + } + + const uint64_t offset_adjustment = slice_offsets[0]; + for (uint64_t& offset : slice_offsets) { + offset -= offset_adjustment; + } + + self_type ret; + ret.offsets_ = slice_offsets; + ret.values_ = slice_values; + return ret; + } + self_type& operator=(const self_type& other) { offsets_ = other.offsets_; values_ = other.values_; @@ -1199,6 +1239,25 @@ struct Fragment { std::tuple_cat(dimensions(), attributes())); } + /** + * @return a new fragment containing the cells in the range `[cell_start, + * cell_start + num_cells)` + */ + self_type slice(uint64_t cell_start, uint64_t num_cells) const { + const auto dims = std::apply( + [&](Ts&... dst) { + return std::make_tuple(dst.slice(cell_start, num_cells)...); + }, + dimensions()); + const auto atts = std::apply( + [&](Ts&... dst) { + return std::make_tuple(dst.slice(cell_start, num_cells)...); + }, + attributes()); + + return self_type{.dims_ = dims, .atts_ = atts}; + } + bool operator==(const self_type& other) const { return dimensions() == other.dimensions() && attributes() == other.attributes(); From b82617183e937c2826d9977b34dfd0feb185fc47 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 14:52:25 -0500 Subject: [PATCH 101/109] Use write_unit_num_cells in var examples --- test/src/unit-cppapi-max-fragment-size.cc | 270 +++++++++++----------- 1 file changed, 141 insertions(+), 129 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 4c8affc724b..be473ce317a 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -636,27 +636,28 @@ instance_dense_global_order( attributes.num_cells() - cells_written, write_unit_num_cells.value_or(attributes.num_cells())); - auto field_sizes = templates::query::write_make_field_sizes( - attributes, - cells_written, - write_unit_num_cells.value_or(attributes.num_cells())); + const F attributes_this_write = + attributes.slice(cells_written, cells_this_write); + + auto field_sizes = templates::query::make_field_sizes( + attributes_this_write, cells_this_write); + templates::query::accumulate_cursor( + attributes_this_write, cursor, field_sizes); templates::query::set_fields( ctx.ptr().get(), query.ptr().get(), field_sizes, - const_cast(attributes), + const_cast(attributes_this_write), [](unsigned d) { return "d" + std::to_string(d + 1); }, - [](unsigned a) { return "a" + std::to_string(a + 1); }, - cursor); + [](unsigned a) { return "a" + std::to_string(a + 1); }); const auto status = query.submit(); ASSERTER(status == Query::Status::COMPLETE); - templates::query::accumulate_cursor(attributes, cursor, field_sizes); - const uint64_t cells_written_this_write = - templates::query::num_cells(attributes, field_sizes); + templates::query::num_cells( + attributes_this_write, field_sizes); ASSERTER(cells_written_this_write == cells_this_write); cells_written += cells_written_this_write; @@ -1449,137 +1450,149 @@ TEST_CASE( const uint64_t max_fragment_size = 4 * 64 * 1024; - SECTION("Even") { - F attributes; - attributes.reserve(num_cells.value()); - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = std::to_string(c); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); - } + F attributes; + attributes.reserve(num_cells.value()); - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes); - - std::vector> expect; - expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); - expect.push_back( - {subrow, - Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); - } + const std::optional write_unit_num_cells = + GENERATE(std::optional{}, 64, 1024, 1024 * 1024); const uint64_t num_cells_per_tile = d1_extent * d2_extent; - F attributes; - attributes.reserve(num_cells.value()); + DYNAMIC_SECTION( + "write_unit_num_cells = " + << (write_unit_num_cells.has_value() ? + std::to_string(write_unit_num_cells.value()) : + "unlimited")) { + SECTION("Even") { + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = std::to_string(c); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } - SECTION("Skew first tile") { - // inflate all the records of the first tile - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = - (c < num_cells_per_tile ? "foobargubquux" + std::to_string(c) : - std::to_string(c)); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back( + {subrow, + Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); } - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes); - - std::vector> expect; - expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent * 1 / 4) - 1)}); - expect.push_back( - {subrow, - Dom64( - d2_extent * d1_extent * 1 / 4, - (d2_extent * d1_extent * 3 / 4) - 1)}); - expect.push_back( - {subrow, - Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); - } + SECTION("Skew first tile") { + // inflate all the records of the first tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (c < num_cells_per_tile ? "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } - SECTION("Skew second tile") { - // inflate all the records of the second tile - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = - (num_cells_per_tile <= c && c < 2 * num_cells_per_tile ? - "foobargubquux" + std::to_string(c) : - std::to_string(c)); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + expect.push_back( + {subrow, Dom64(0, (d2_extent * d1_extent * 1 / 4) - 1)}); + expect.push_back( + {subrow, + Dom64( + d2_extent * d1_extent * 1 / 4, + (d2_extent * d1_extent * 3 / 4) - 1)}); + expect.push_back( + {subrow, + Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); } - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes); - - std::vector> expect; - expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 4) - 1)}); - expect.push_back( - {subrow, - Dom64( - d2_extent * d1_extent * 1 / 4, - (d2_extent * d1_extent * 3 / 4) - 1)}); - expect.push_back( - {subrow, - Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); - } + SECTION("Skew second tile") { + // inflate all the records of the second tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (num_cells_per_tile <= c && c < 2 * num_cells_per_tile ? + "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } - SECTION("Skew last tile") { - // inflate all the records of the last tile - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = - (num_cells.value() - num_cells_per_tile <= c ? - "foobargubquux" + std::to_string(c) : - std::to_string(c)); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 4) - 1)}); + expect.push_back( + {subrow, + Dom64( + d2_extent * d1_extent * 1 / 4, + (d2_extent * d1_extent * 3 / 4) - 1)}); + expect.push_back( + {subrow, + Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); } - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes); - - std::vector> expect; - expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); - expect.push_back( - {subrow, - Dom64( - d2_extent * d1_extent / 2, - (d2_extent * d1_extent * 7 / 8) - 1)}); - expect.push_back( - {subrow, - Dom64(d2_extent * d1_extent * 7 / 8, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); + SECTION("Skew last tile") { + // inflate all the records of the last tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (num_cells.value() - num_cells_per_tile <= c ? + "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back( + {subrow, + Dom64( + d2_extent * d1_extent / 2, + (d2_extent * d1_extent * 7 / 8) - 1)}); + expect.push_back( + {subrow, + Dom64(d2_extent * d1_extent * 7 / 8, d2_extent * d1_extent - 1)}); + CHECK(expect == actual); + } } } } @@ -1620,4 +1633,3 @@ TEST_CASE( {s1, s2}), expect); } - From e4f7479ae115ff9207ffe1a685d7c5c4519db6f7 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 15:27:00 -0500 Subject: [PATCH 102/109] Fix Fragment::num_cells to use dimension if present --- test/support/src/array_templates.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 89f5fdc185a..ed30d713be4 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -1183,7 +1183,7 @@ struct Fragment { if constexpr (std::tuple_size::value == 0) { return std::get<0>(atts_).num_cells(); } else { - return std::get<0>(atts_).num_cells(); + return std::get<0>(dims_).num_cells(); } } From 47648e64c1250cc02486961db75ecc2f3e07402b Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 15:27:22 -0500 Subject: [PATCH 103/109] Two rows of tiles in var length example --- test/src/unit-cppapi-max-fragment-size.cc | 74 ++++++++++++++++++----- 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index be473ce317a..773175ef8dc 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1441,8 +1441,11 @@ TEST_CASE( const Dim64 row(0, std::numeric_limits::max() - 1, d1_extent); const Dim64 col(0, d2_span - 1, d2_extent); - const Dom64 subrow(0, d1_extent - 1); - const Dom64 subcol = (col.domain); + const Dom64 subrow(0, 2 * d1_extent - 1); + const Dom64 subcol = col.domain; + + const Dom64 subrow_0(0, d1_extent - 1); + const Dom64 subrow_1(d1_extent, 2 * d1_extent - 1); const std::optional num_cells = subarray_num_cells(std::vector{subrow, subcol}); @@ -1482,10 +1485,20 @@ TEST_CASE( write_unit_num_cells); std::vector> expect; - expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back({subrow_0, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); expect.push_back( - {subrow, + {subrow_0, Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); + expect.push_back( + {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 8, + d2_extent * d1_extent * 3 / 4 - 1)}); + expect.push_back( + {subrow_1, + Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); CHECK(expect == actual); } @@ -1512,14 +1525,24 @@ TEST_CASE( std::vector> expect; expect.push_back( - {subrow, Dom64(0, (d2_extent * d1_extent * 1 / 4) - 1)}); + {subrow_0, Dom64(0, (d2_extent * d1_extent * 1 / 4) - 1)}); expect.push_back( - {subrow, + {subrow_0, Dom64( d2_extent * d1_extent * 1 / 4, (d2_extent * d1_extent * 3 / 4) - 1)}); expect.push_back( - {subrow, + {subrow_0, + Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); + expect.push_back( + {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 8, + d2_extent * d1_extent * 3 / 4 - 1)}); + expect.push_back( + {subrow_1, Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); CHECK(expect == actual); } @@ -1547,14 +1570,25 @@ TEST_CASE( write_unit_num_cells); std::vector> expect; - expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 4) - 1)}); + expect.push_back({subrow_0, Dom64(0, (d2_extent * d1_extent / 4) - 1)}); expect.push_back( - {subrow, + {subrow_0, Dom64( d2_extent * d1_extent * 1 / 4, (d2_extent * d1_extent * 3 / 4) - 1)}); expect.push_back( - {subrow, + {subrow_0, + Dom64( + d2_extent * d1_extent * 3 / 4, (d2_extent * d1_extent) - 1)}); + expect.push_back( + {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 8, + d2_extent * d1_extent * 3 / 4 - 1)}); + expect.push_back( + {subrow_1, Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); CHECK(expect == actual); } @@ -1582,14 +1616,24 @@ TEST_CASE( write_unit_num_cells); std::vector> expect; - expect.push_back({subrow, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back({subrow_0, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back( + {subrow_0, + Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); + expect.push_back( + {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 8, + (d2_extent * d1_extent * 3 / 4) - 1)}); expect.push_back( - {subrow, + {subrow_1, Dom64( - d2_extent * d1_extent / 2, - (d2_extent * d1_extent * 7 / 8) - 1)}); + d2_extent * d1_extent * 3 / 4, + d2_extent * d1_extent * 7 / 8 - 1)}); expect.push_back( - {subrow, + {subrow_1, Dom64(d2_extent * d1_extent * 7 / 8, d2_extent * d1_extent - 1)}); CHECK(expect == actual); } From 25bf18424fdbc4d30a33d81ceab0ca652f5e79c6 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 21:37:39 -0500 Subject: [PATCH 104/109] Remove unused --- test/support/src/array_templates.h | 1 - 1 file changed, 1 deletion(-) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index ed30d713be4..305b02aab1b 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -1373,7 +1373,6 @@ struct query_applicator { const std::tuple&...> fields, uint64_t cell_offset, uint64_t cell_limit = std::numeric_limits::max()) { - std::optional num_cells; auto write_make_field_size = [&]( const query_buffers& field) { const auto field_size = field.make_field_size(cell_offset, cell_limit); From b75bd59b66cd8b28ffb8a10829f609a0bf012945 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 21:38:02 -0500 Subject: [PATCH 105/109] Correctly use VFSTestSetup context --- test/src/unit-cppapi-max-fragment-size.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 773175ef8dc..2c89fe6e4d4 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -809,7 +809,7 @@ instance_dense_global_order( */ TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { VFSTestSetup vfs; - Context ctx; + Context ctx(vfs.ctx()); const std::string array_name = vfs.array_uri("max_fragment_size_dense_global_order"); @@ -1283,7 +1283,7 @@ TEST_CASE( using Dom64 = Dim64::domain_type; VFSTestSetup vfs; - Context ctx; + Context ctx(vfs.ctx()); const std::string array_name = vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_1d"); @@ -1314,7 +1314,7 @@ TEST_CASE( using Dom64 = Dim64::domain_type; VFSTestSetup vfs; - Context ctx; + Context ctx(vfs.ctx()); const std::string array_name = vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_2d"); @@ -1387,7 +1387,7 @@ TEST_CASE( using Dom64 = Dim64::domain_type; VFSTestSetup vfs; - Context ctx; + Context ctx(vfs.ctx()); const std::string array_name = vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_3d"); @@ -1419,7 +1419,7 @@ TEST_CASE( "C++ API: Max fragment size dense array var size tiles", "[cppapi][max-frag-size]") { VFSTestSetup vfs; - Context ctx; + Context ctx(vfs.ctx()); const std::string array_name = vfs.array_uri("max_fragment_size_dense_global_order_var"); @@ -1651,7 +1651,7 @@ TEST_CASE( const std::string array_name = vfs.array_uri("max_fragment_size_dense_global_order_rest_support"); - Context ctx; + Context ctx(vfs.ctx()); using Dim = templates::Dimension; using Dom = Dim::domain_type; From a1640a8c4d4ebc351a5ed92781b13e9b4e100038 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 22:20:28 -0500 Subject: [PATCH 106/109] Add test for large write num cells and update FIXME comment --- test/src/unit-cppapi-max-fragment-size.cc | 394 ++++++++++-------- .../sm/query/writers/global_order_writer.cc | 10 + 2 files changed, 237 insertions(+), 167 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index 2c89fe6e4d4..e49e6bde146 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1451,191 +1451,251 @@ TEST_CASE( subarray_num_cells(std::vector{subrow, subcol}); REQUIRE(num_cells.has_value()); - const uint64_t max_fragment_size = 4 * 64 * 1024; + const uint64_t approx_tiles_per_fragment = GENERATE(4, 9); + const uint64_t max_fragment_size = approx_tiles_per_fragment * 64 * 1024; F attributes; attributes.reserve(num_cells.value()); - const std::optional write_unit_num_cells = - GENERATE(std::optional{}, 64, 1024, 1024 * 1024); + const std::optional write_unit_num_cells = GENERATE_COPY( + std::optional{}, + 64, + 1024, + 1024 * 1024, + num_cells.value() - 1); const uint64_t num_cells_per_tile = d1_extent * d2_extent; DYNAMIC_SECTION( - "write_unit_num_cells = " - << (write_unit_num_cells.has_value() ? - std::to_string(write_unit_num_cells.value()) : - "unlimited")) { - SECTION("Even") { - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = std::to_string(c); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); + "approx_tiles_per_fragment = " << approx_tiles_per_fragment) { + DYNAMIC_SECTION( + "write_unit_num_cells = " + << (write_unit_num_cells.has_value() ? + std::to_string(write_unit_num_cells.value()) : + "unlimited")) { + SECTION("Even") { + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = std::to_string(c); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + if (approx_tiles_per_fragment == 4) { + expect.push_back( + {subrow_0, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back( + {subrow_0, + Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); + expect.push_back( + {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 8, + d2_extent * d1_extent * 3 / 4 - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 4, + d2_extent * d1_extent - 1)}); + } else { + expect.push_back({subrow_0, subcol}); + expect.push_back({subrow_1, subcol}); + } + CHECK(expect == actual); } - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes, - write_unit_num_cells); - - std::vector> expect; - expect.push_back({subrow_0, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); - expect.push_back( - {subrow_0, - Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); - expect.push_back( - {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 8, - d2_extent * d1_extent * 3 / 4 - 1)}); - expect.push_back( - {subrow_1, - Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); - } + SECTION("Skew first tile") { + // inflate all the records of the first tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (c < num_cells_per_tile ? "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } - SECTION("Skew first tile") { - // inflate all the records of the first tile - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = - (c < num_cells_per_tile ? "foobargubquux" + std::to_string(c) : - std::to_string(c)); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + if (approx_tiles_per_fragment == 4) { + expect.push_back( + {subrow_0, Dom64(0, (d2_extent * d1_extent * 1 / 4) - 1)}); + expect.push_back( + {subrow_0, + Dom64( + d2_extent * d1_extent * 1 / 4, + (d2_extent * d1_extent * 3 / 4) - 1)}); + expect.push_back( + {subrow_0, + Dom64( + d2_extent * d1_extent * 3 / 4, + d2_extent * d1_extent - 1)}); + expect.push_back( + {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 8, + d2_extent * d1_extent * 3 / 4 - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 4, + d2_extent * d1_extent - 1)}); + } else { + expect.push_back( + {subrow_0, Dom64(0, d2_extent * d1_extent * 7 / 8 - 1)}); + expect.push_back( + {subrow_0, + Dom64( + d2_extent * d1_extent * 7 / 8, + d2_extent * d1_extent - 1)}); + expect.push_back({subrow_1, subcol}); + } + CHECK(expect == actual); } - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes, - write_unit_num_cells); - - std::vector> expect; - expect.push_back( - {subrow_0, Dom64(0, (d2_extent * d1_extent * 1 / 4) - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 1 / 4, - (d2_extent * d1_extent * 3 / 4) - 1)}); - expect.push_back( - {subrow_0, - Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); - expect.push_back( - {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 8, - d2_extent * d1_extent * 3 / 4 - 1)}); - expect.push_back( - {subrow_1, - Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); - } + SECTION("Skew second tile") { + // inflate all the records of the second tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (num_cells_per_tile <= c && c < 2 * num_cells_per_tile ? + "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } - SECTION("Skew second tile") { - // inflate all the records of the second tile - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = - (num_cells_per_tile <= c && c < 2 * num_cells_per_tile ? - "foobargubquux" + std::to_string(c) : - std::to_string(c)); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + if (approx_tiles_per_fragment == 4) { + expect.push_back( + {subrow_0, Dom64(0, (d2_extent * d1_extent / 4) - 1)}); + expect.push_back( + {subrow_0, + Dom64( + d2_extent * d1_extent * 1 / 4, + (d2_extent * d1_extent * 3 / 4) - 1)}); + expect.push_back( + {subrow_0, + Dom64( + d2_extent * d1_extent * 3 / 4, + (d2_extent * d1_extent) - 1)}); + expect.push_back( + {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 8, + d2_extent * d1_extent * 3 / 4 - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 4, + d2_extent * d1_extent - 1)}); + } else { + expect.push_back( + {subrow_0, Dom64(0, d2_extent * d1_extent * 7 / 8 - 1)}); + expect.push_back( + {subrow_0, + Dom64( + d2_extent * d1_extent * 7 / 8, + d2_extent * d1_extent - 1)}); + expect.push_back({subrow_1, subcol}); + } + CHECK(expect == actual); } - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes, - write_unit_num_cells); - - std::vector> expect; - expect.push_back({subrow_0, Dom64(0, (d2_extent * d1_extent / 4) - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 1 / 4, - (d2_extent * d1_extent * 3 / 4) - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 3 / 4, (d2_extent * d1_extent) - 1)}); - expect.push_back( - {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 8, - d2_extent * d1_extent * 3 / 4 - 1)}); - expect.push_back( - {subrow_1, - Dom64(d2_extent * d1_extent * 3 / 4, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); - } + SECTION("Skew last tile") { + // inflate all the records of the last tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (num_cells.value() - num_cells_per_tile <= c ? + "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } - SECTION("Skew last tile") { - // inflate all the records of the last tile - for (uint64_t c = 0; c < num_cells.value(); c++) { - const std::string str = - (num_cells.value() - num_cells_per_tile <= c ? - "foobargubquux" + std::to_string(c) : - std::to_string(c)); - std::get<0>(attributes.attributes()) - .push_back(std::span(str.begin(), str.end())); + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + if (approx_tiles_per_fragment == 4) { + expect.push_back( + {subrow_0, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); + expect.push_back( + {subrow_0, + Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); + expect.push_back( + {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 8, + (d2_extent * d1_extent * 3 / 4) - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 3 / 4, + d2_extent * d1_extent * 7 / 8 - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 7 / 8, + d2_extent * d1_extent - 1)}); + } else { + expect.push_back({subrow_0, subcol}); + expect.push_back( + {subrow_1, Dom64(0, d2_extent * d1_extent * 7 / 8 - 1)}); + expect.push_back( + {subrow_1, + Dom64( + d2_extent * d1_extent * 7 / 8, + d2_extent * d1_extent - 1)}); + } + CHECK(expect == actual); } - - const auto actual = instance_dense_global_order( - ctx, - array_name, - tile_order, - cell_order, - max_fragment_size, - {row, col}, - {subrow, subcol}, - attributes, - write_unit_num_cells); - - std::vector> expect; - expect.push_back({subrow_0, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); - expect.push_back( - {subrow_0, - Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); - expect.push_back( - {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 8, - (d2_extent * d1_extent * 3 / 4) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 4, - d2_extent * d1_extent * 7 / 8 - 1)}); - expect.push_back( - {subrow_1, - Dom64(d2_extent * d1_extent * 7 / 8, d2_extent * d1_extent - 1)}); - CHECK(expect == actual); } } } diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index fbd844e5fb5..03ab566172d 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -753,6 +753,16 @@ Status GlobalOrderWriter::finalize_global_write_state() { // tile runs over... in this case we need to do the rectangle thing all // over again so as to avoid writing a fragment which exceeds the max // fragment size. + // + // HOWEVER, this state might not be reachable, because dense global + // order writes must be fully tile-aligned, which means that the + // "last tile" which we would flush here should have zero cells. + // Note that the subarray is a rectangle, so + // `identify_fragment_tile_boundaries` should always indicate that all of + // the tiles can be written. + // + // As such we are not going to expend more effort on this unless + // we see evidence of it. } } else { iassert(global_write_state_->last_tiles_.begin()->second.size() <= 1); From 0dead5ba8af464c5382085967fd96e3b30d6b7f2 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 22:55:02 -0500 Subject: [PATCH 107/109] Attempt to fix false positive Wmaybe-uninitialized --- test/support/src/array_templates.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 305b02aab1b..9dc422bc48a 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -1402,7 +1402,11 @@ struct query_applicator { const auto& field_cursor) { const auto rc = field.attach_to_query(ctx, query, field_size, name, field_cursor); - ASSERTER(std::optional() == error_if_any(ctx, rc)); + + // some versions of gcc have a false positive here for + // -Wmaybe-uninitialized, so do this instead of comparing against + // `std::optional` + ASSERTER("" == error_if_any(ctx, rc).value_or("")); }; unsigned d = 0; From 5ac3752f65473cabf13a27f08fe4c53542e511e5 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Tue, 4 Nov 2025 22:56:31 -0500 Subject: [PATCH 108/109] make_subcol code cleanliness --- test/src/unit-cppapi-max-fragment-size.cc | 142 ++++++---------------- 1 file changed, 34 insertions(+), 108 deletions(-) diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index e49e6bde146..a5d5f883153 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -1444,6 +1444,11 @@ TEST_CASE( const Dom64 subrow(0, 2 * d1_extent - 1); const Dom64 subcol = col.domain; + auto make_subcol = [&](uint64_t start_tile, uint64_t end_tile) -> Dom64 { + const uint64_t tile_span = d2_extent * d1_extent / 8; + return Dom64(tile_span * start_tile, tile_span * end_tile - 1); + }; + const Dom64 subrow_0(0, d1_extent - 1); const Dom64 subrow_1(d1_extent, 2 * d1_extent - 1); @@ -1493,23 +1498,11 @@ TEST_CASE( std::vector> expect; if (approx_tiles_per_fragment == 4) { - expect.push_back( - {subrow_0, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); - expect.push_back( - {subrow_0, - Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); - expect.push_back( - {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 8, - d2_extent * d1_extent * 3 / 4 - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 4, - d2_extent * d1_extent - 1)}); + expect.push_back({subrow_0, make_subcol(0, 4)}); + expect.push_back({subrow_0, make_subcol(4, 8)}); + expect.push_back({subrow_1, make_subcol(0, 3)}); + expect.push_back({subrow_1, make_subcol(3, 6)}); + expect.push_back({subrow_1, make_subcol(6, 8)}); } else { expect.push_back({subrow_0, subcol}); expect.push_back({subrow_1, subcol}); @@ -1540,38 +1533,15 @@ TEST_CASE( std::vector> expect; if (approx_tiles_per_fragment == 4) { - expect.push_back( - {subrow_0, Dom64(0, (d2_extent * d1_extent * 1 / 4) - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 1 / 4, - (d2_extent * d1_extent * 3 / 4) - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 3 / 4, - d2_extent * d1_extent - 1)}); - expect.push_back( - {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 8, - d2_extent * d1_extent * 3 / 4 - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 4, - d2_extent * d1_extent - 1)}); + expect.push_back({subrow_0, make_subcol(0, 2)}); + expect.push_back({subrow_0, make_subcol(2, 6)}); + expect.push_back({subrow_0, make_subcol(6, 8)}); + expect.push_back({subrow_1, make_subcol(0, 3)}); + expect.push_back({subrow_1, make_subcol(3, 6)}); + expect.push_back({subrow_1, make_subcol(6, 8)}); } else { - expect.push_back( - {subrow_0, Dom64(0, d2_extent * d1_extent * 7 / 8 - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 7 / 8, - d2_extent * d1_extent - 1)}); + expect.push_back({subrow_0, make_subcol(0, 7)}); + expect.push_back({subrow_0, make_subcol(7, 8)}); expect.push_back({subrow_1, subcol}); } CHECK(expect == actual); @@ -1601,38 +1571,15 @@ TEST_CASE( std::vector> expect; if (approx_tiles_per_fragment == 4) { - expect.push_back( - {subrow_0, Dom64(0, (d2_extent * d1_extent / 4) - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 1 / 4, - (d2_extent * d1_extent * 3 / 4) - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 3 / 4, - (d2_extent * d1_extent) - 1)}); - expect.push_back( - {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 8, - d2_extent * d1_extent * 3 / 4 - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 4, - d2_extent * d1_extent - 1)}); + expect.push_back({subrow_0, make_subcol(0, 2)}); + expect.push_back({subrow_0, make_subcol(2, 6)}); + expect.push_back({subrow_0, make_subcol(6, 8)}); + expect.push_back({subrow_1, make_subcol(0, 3)}); + expect.push_back({subrow_1, make_subcol(3, 6)}); + expect.push_back({subrow_1, make_subcol(6, 8)}); } else { - expect.push_back( - {subrow_0, Dom64(0, d2_extent * d1_extent * 7 / 8 - 1)}); - expect.push_back( - {subrow_0, - Dom64( - d2_extent * d1_extent * 7 / 8, - d2_extent * d1_extent - 1)}); + expect.push_back({subrow_0, make_subcol(0, 7)}); + expect.push_back({subrow_0, make_subcol(7, 8)}); expect.push_back({subrow_1, subcol}); } CHECK(expect == actual); @@ -1662,37 +1609,16 @@ TEST_CASE( std::vector> expect; if (approx_tiles_per_fragment == 4) { - expect.push_back( - {subrow_0, Dom64(0, (d2_extent * d1_extent / 2) - 1)}); - expect.push_back( - {subrow_0, - Dom64(d2_extent * d1_extent / 2, d2_extent * d1_extent - 1)}); - expect.push_back( - {subrow_1, Dom64(0, (d2_extent * d1_extent * 3 / 8) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 8, - (d2_extent * d1_extent * 3 / 4) - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 3 / 4, - d2_extent * d1_extent * 7 / 8 - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 7 / 8, - d2_extent * d1_extent - 1)}); + expect.push_back({subrow_0, make_subcol(0, 4)}); + expect.push_back({subrow_0, make_subcol(4, 8)}); + expect.push_back({subrow_1, make_subcol(0, 3)}); + expect.push_back({subrow_1, make_subcol(3, 6)}); + expect.push_back({subrow_1, make_subcol(6, 7)}); + expect.push_back({subrow_1, make_subcol(7, 8)}); } else { expect.push_back({subrow_0, subcol}); - expect.push_back( - {subrow_1, Dom64(0, d2_extent * d1_extent * 7 / 8 - 1)}); - expect.push_back( - {subrow_1, - Dom64( - d2_extent * d1_extent * 7 / 8, - d2_extent * d1_extent - 1)}); + expect.push_back({subrow_1, make_subcol(0, 7)}); + expect.push_back({subrow_1, make_subcol(7, 8)}); } CHECK(expect == actual); } From dabdc1d4f8e00361cee36deca6cd1faa6d814f46 Mon Sep 17 00:00:00 2001 From: Ryan Roelke Date: Wed, 5 Nov 2025 08:24:56 -0500 Subject: [PATCH 109/109] Review comments --- test/src/unit-cppapi-consolidation.cc | 3 --- tiledb/sm/fragment/fragment_metadata.cc | 4 ---- tiledb/sm/query/writers/global_order_writer.cc | 7 ++++--- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/test/src/unit-cppapi-consolidation.cc b/test/src/unit-cppapi-consolidation.cc index f47d283402b..31a3db46dc4 100644 --- a/test/src/unit-cppapi-consolidation.cc +++ b/test/src/unit-cppapi-consolidation.cc @@ -735,9 +735,6 @@ instance_dense_consolidation( sm::GlobalCellCmp globalcmp( forread.ptr()->array()->array_schema_latest().domain()); - const auto hyperrow_sizes = compute_hyperrow_sizes( - tile_order, tile_extents, non_empty_domain); - auto icmp = [&](uint64_t ia, uint64_t ib) -> bool { const auto sa = templates::global_cell_cmp_span(coords[ia]); const auto sb = templates::global_cell_cmp_span(coords[ib]); diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index b3844f0d288..6d2b1789caf 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -873,10 +873,6 @@ void FragmentMetadata::store(const EncryptionKey& encryption_key) { iassert(tile_null_counts.size() == dense_tile_num); } } - - // what about min, max, sum? - // requires iteration in stride with schema fields to get cell size - // probably a good idea, ask about it in code review } auto timer_se = resources_->stats().start_timer("write_store_frag_meta"); diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 03ab566172d..85abf228145 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -770,14 +770,15 @@ Status GlobalOrderWriter::finalize_global_write_state() { // Handle last tile Status st = global_write_handle_last_tile(); + auto meta = global_write_state_->frag_meta_; + if (!st.ok()) { - if (global_write_state_->frag_meta_) { - throw_if_not_ok(close_files(global_write_state_->frag_meta_)); + if (meta) { + throw_if_not_ok(close_files(meta)); } return st; } - auto meta = global_write_state_->frag_meta_; if (!meta) { return Status::Ok(); }