diff --git a/test/src/unit-cppapi-consolidation.cc b/test/src/unit-cppapi-consolidation.cc index 950553890c1..31a3db46dc4 100644 --- a/test/src/unit-cppapi-consolidation.cc +++ b/test/src/unit-cppapi-consolidation.cc @@ -32,10 +32,16 @@ #include "tiledb/sm/cpp_api/tiledb_experimental" #include +#include "test/support/src/array_helpers.h" +#include "test/support/src/array_templates.h" +#include "test/support/src/fragment_info_helpers.h" #include "test/support/src/helpers.h" +#include "tiledb/api/c_api/array/array_api_internal.h" #include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/misc/comparators.h" using namespace tiledb; +using namespace tiledb::test; void remove_array(const std::string& array_name) { Context ctx; @@ -538,3 +544,422 @@ TEST_CASE( remove_array(array_name); } + +template +void instance_dense_consolidation_create_array( + Context& ctx, + const std::string& array_name, + const std::vector>& domain) { + using Coord = templates::Dimension
::value_type; + + // create array + Domain arraydomain(ctx); + for (uint64_t d = 0; d < domain.size(); d++) { + const std::string dname = "d" + std::to_string(d + 1); + auto dd = Dimension::create( + ctx, + dname, + {domain[d].domain.lower_bound, domain[d].domain.upper_bound}, + domain[d].extent); + arraydomain.add_dimension(dd); + } + + ArraySchema schema(ctx, TILEDB_DENSE); + schema.set_domain(arraydomain); + + const std::vector> attributes = + templates::ddl::physical_type_attributes(); + for (uint64_t a = 0; a < attributes.size(); a++) { + const std::string aname = "a" + std::to_string(a + 1); + auto aa = Attribute::create( + ctx, + aname, + static_cast(std::get<0>(attributes[a]))) + .set_cell_val_num(std::get<1>(attributes[a])) + .set_nullable(std::get<2>(attributes[a])); + schema.add_attribute(aa); + } + + Array::create(array_name, schema); +} + +/** + * Runs an instance of a dense consolidation test. + * The `fragments` are written in ascending order from the beginning of the + * array domain. + * + * Asserts that after consolidation we get fragments which appropriately satisfy + * `max_fragment_size`: + * 1) no fragment is larger than that size + * 2) if the union of two adjacent fragments can form a rectangular domain, then + * the sum of their sizes must exceed the maximum fragment size (else they + * should be one fragment) + * + * @precondition the `fragments` each have a number of cells which is an + * integral number of tiles + */ +template < + sm::Datatype DT, + templates::FragmentType F, + typename Asserter = AsserterCatch> +std::vector::domain_type>> +instance_dense_consolidation( + Context& ctx, + const std::string& array_name, + const std::vector>& domain, + std::vector& fragments, + uint64_t max_fragment_size) { + using Coord = templates::Dimension
::value_type; + + static constexpr sm::Layout tile_order = sm::Layout::ROW_MAJOR; + + // create array + instance_dense_consolidation_create_array(ctx, array_name, domain); + + DeleteArrayGuard arrayguard(ctx.ptr().get(), array_name.c_str()); + + sm::NDRange array_domain; + for (const auto& dim : domain) { + array_domain.push_back( + Range(dim.domain.lower_bound, dim.domain.upper_bound)); + } + + uint64_t num_cells_per_tile = 1; + std::vector tile_extents; + for (const auto& dim : domain) { + tile_extents.push_back(dim.extent); + num_cells_per_tile *= static_cast(dim.extent); + } + + // populate array + uint64_t start_tile = 0; + { + Array forwrite(ctx, array_name, TILEDB_WRITE); + for (auto& f : fragments) { + const uint64_t f_num_tiles = f.num_cells() / num_cells_per_tile; + + const std::optional subarray = domain_tile_offset( + tile_order, tile_extents, array_domain, start_tile, f_num_tiles); + ASSERTER(subarray.has_value()); + + templates::query::write_fragment( + f, forwrite, subarray.value()); + + start_tile += f_num_tiles; + } + } + + sm::NDRange non_empty_domain; + { + std::optional maybe = domain_tile_offset( + tile_order, tile_extents, array_domain, 0, start_tile); + ASSERTER(maybe.has_value()); + non_empty_domain = maybe.value(); + } + + // consolidate + Config cconfig; + cconfig["sm.consolidation.max_fragment_size"] = + std::to_string(max_fragment_size); + Array::consolidate(ctx, array_name, &cconfig); + + Array forread(ctx, array_name, TILEDB_READ); + + // sanity check the non-empty domain + // NB: cannot use `==` for some reason, the array `non_empty_domain` method + // returns `range_start_size_` zero + { + const auto actual_domain = forread.ptr()->array()->non_empty_domain(); + for (uint64_t d = 0; d < domain.size(); d++) { + ASSERTER( + non_empty_domain[d].start_as() == + actual_domain[d].start_as()); + ASSERTER( + non_empty_domain[d].end_as() == + actual_domain[d].end_as()); + } + } + + // check fragment info + FragmentInfo finfo(ctx, array_name); + finfo.load(); + + const auto fragment_domains = + collect_and_validate_fragment_domains( + ctx, + tile_order, + array_name, + tile_extents, + non_empty_domain, + max_fragment_size); + + // read back fragments to check contents + std::vector api_subarray; + api_subarray.reserve(2 * domain.size()); + for (uint64_t d = 0; d < domain.size(); d++) { + api_subarray.push_back(non_empty_domain[d].start_as()); + api_subarray.push_back(non_empty_domain[d].end_as()); + } + + F input_concatenated, output; + for (const auto& f : fragments) { + input_concatenated.extend(f); + } + + // sort in global order + { + std::vector idxs(input_concatenated.size()); + std::iota(idxs.begin(), idxs.end(), 0); + + std::vector next_coord; + next_coord.reserve(domain.size()); + for (uint64_t d = 0; d < domain.size(); d++) { + next_coord.push_back(domain[d].domain.lower_bound); + } + + std::vector> coords; + coords.reserve(input_concatenated.size()); + for (uint64_t i = 0; i < input_concatenated.size(); i++) { + coords.push_back(next_coord); + for (uint64_t di = 0; di < domain.size(); di++) { + const uint64_t d = domain.size() - di - 1; + if (next_coord[d] < domain[d].domain.upper_bound) { + ++next_coord[d]; + break; + } else { + next_coord[d] = 0; + } + } + } + + sm::GlobalCellCmp globalcmp( + forread.ptr()->array()->array_schema_latest().domain()); + + auto icmp = [&](uint64_t ia, uint64_t ib) -> bool { + const auto sa = templates::global_cell_cmp_span(coords[ia]); + const auto sb = templates::global_cell_cmp_span(coords[ib]); + return globalcmp(sa, sb); + }; + + std::sort(idxs.begin(), idxs.end(), icmp); + + input_concatenated.attributes() = stdx::select( + stdx::reference_tuple(input_concatenated.attributes()), + std::span(idxs)); + } + + output = input_concatenated; + + Subarray sub(ctx, forread); + sub.set_subarray(api_subarray); + + Query query(forread.context(), forread); + query.set_layout(TILEDB_GLOBAL_ORDER); + query.set_subarray(sub); + + // make field size locations + templates::query::fragment_field_sizes_t field_sizes = + templates::query::make_field_sizes(output, output.num_cells()); + + // add fields to query + auto outcursor = templates::query::fragment_field_sizes_t(); + templates::query::set_fields( + ctx.ptr().get(), + query.ptr().get(), + field_sizes, + output, + [](unsigned d) { return "d" + std::to_string(d + 1); }, + [](unsigned a) { return "a" + std::to_string(a + 1); }, + outcursor); + + const auto status = query.submit(); + ASSERTER(status == Query::Status::COMPLETE); + + // resize according to what was found + templates::query::apply_cursor(output, outcursor, field_sizes); + + ASSERTER(output == input_concatenated); + + return fragment_domains; +} + +/** + * Test case inspired by CORE-290. + * + */ +TEST_CASE( + "C++ API: Test consolidation dense array with max fragment size", + "[cppapi][consolidation][rest]") { + using Dim64 = templates::Dimension; + using Dom64 = Dim64::domain_type; + using DenseFragmentFixed = templates::Fragment, std::tuple>; + + const std::string array_name = "cppapi_consolidation_dense"; + + Context ctx; + + SECTION("2D") { + SECTION("Row tiles") { + const Dim64 row(0, std::numeric_limits::max() - 1, 1); + const Dim64 col(0, 99999, 100000); + + const uint64_t num_fragments = 32; + + // each input fragment is a single row + std::vector input_fragments; + for (uint64_t f = 0; f < num_fragments; f++) { + DenseFragmentFixed fdata; + fdata.resize(row.extent * col.domain.num_cells()); + + auto& att = std::get<0>(fdata.attributes()); + std::iota( + att.begin(), att.end(), static_cast(f) * fdata.num_cells()); + + input_fragments.push_back(fdata); + } + + // unfiltered, each row takes `100000 * sizeof(int)` bytes, plus some + // padding + const uint64_t tile_size = (row.extent * col.extent * sizeof(int)) + 92; + const uint64_t max_fragment_size = GENERATE_COPY( + tile_size - 1, tile_size, (2 * tile_size) - 1, 2 * tile_size); + + const uint64_t rows_per_fragment = max_fragment_size / tile_size; + DYNAMIC_SECTION( + "max_fragment_size = " + std::to_string(max_fragment_size)) { + if (rows_per_fragment == 0) { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, + array_name, + {row, col}, + input_fragments, + max_fragment_size), + expect); + } else { + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, max_fragment_size); + + std::vector> expect; + for (uint64_t r = 0; r < num_fragments; r += rows_per_fragment) { + expect.push_back({Dom64(r, r + rows_per_fragment - 1), col.domain}); + } + CHECK(output_fragments == expect); + } + } + } + + SECTION("Rectangle tiles") { + const Dim64 row(0, std::numeric_limits::max() - 1, 4); + const Dim64 col(0, 99999, 100000 / row.extent); + + const uint64_t num_fragments = 32; + + // each input fragment is 4 tiles, covering 4 rows of cells + std::vector input_fragments; + for (uint64_t f = 0; f < num_fragments; f++) { + DenseFragmentFixed fdata; + fdata.resize(row.extent * col.extent * row.extent); + + auto& att = std::get<0>(fdata.attributes()); + std::iota( + att.begin(), att.end(), static_cast(f) * fdata.num_cells()); + + input_fragments.push_back(fdata); + } + + // unfiltered, each row takes `100000 * sizeof(int)` bytes, plus some + // padding + const uint64_t tile_size = (row.extent * col.extent * sizeof(int)) + 92; + + SECTION("Too small") { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, tile_size - 1), + expect); + } + SECTION("One tile") { + std::vector> expect; + for (uint64_t r = 0; r < num_fragments; r++) { + for (uint64_t c = 0; c < 4; c++) { + expect.push_back( + {Dom64(r * 4, r * 4 + 3), + Dom64(col.extent * c, (col.extent * (c + 1)) - 1)}); + } + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, tile_size); + CHECK(output_fragments == expect); + } + SECTION("Two tiles") { + std::vector> expect; + for (uint64_t r = 0; r < num_fragments; r++) { + expect.push_back( + {Dom64(r * 4, r * 4 + 3), Dom64(0, (col.extent * 2) - 1)}); + expect.push_back( + {Dom64(r * 4, r * 4 + 3), + Dom64(col.extent * 2, (col.extent * 4) - 1)}); + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, 2 * tile_size); + } + SECTION("Three tiles") { + // now we have some trouble, each row is 4 tiles, 3 of them fit, + // so we will alternate fragments with 3 tiles and fragments with 1 + // tile to fill out the row, yikes + std::vector> expect; + for (uint64_t r = 0; r < num_fragments; r++) { + expect.push_back( + {Dom64(r * 4, r * 4 + 3), Dom64(0, (col.extent * 3) - 1)}); + expect.push_back( + {Dom64(r * 4, r * 4 + 3), + Dom64(col.extent * 3, (col.extent * 4) - 1)}); + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, 3 * tile_size); + CHECK(output_fragments == expect); + } + SECTION("Four tiles") { + std::vector> expect; + for (uint64_t f = 0; f < num_fragments; f++) { + expect.push_back({Dom64(f * 4, f * 4 + 3), col.domain}); + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, 4 * tile_size); + CHECK(output_fragments == expect); + } + SECTION("Five tiles") { + // since we need rectangle domains this is the same as four tiles + std::vector> expect; + for (uint64_t f = 0; f < num_fragments; f++) { + expect.push_back({Dom64(f * 4, f * 4 + 3), col.domain}); + } + const auto output_fragments = instance_dense_consolidation< + sm::Datatype::UINT64, + DenseFragmentFixed>( + ctx, array_name, {row, col}, input_fragments, 5 * tile_size); + CHECK(output_fragments == expect); + } + } + } +} diff --git a/test/src/unit-cppapi-max-fragment-size.cc b/test/src/unit-cppapi-max-fragment-size.cc index dd79e638fe4..a5d5f883153 100644 --- a/test/src/unit-cppapi-max-fragment-size.cc +++ b/test/src/unit-cppapi-max-fragment-size.cc @@ -30,17 +30,34 @@ * Tests the C++ API for maximum fragment size. */ +#include #include +#include +#include "test/support/rapidcheck/array_templates.h" +#include "test/support/src/array_helpers.h" +#include "test/support/src/array_templates.h" +#include "test/support/src/fragment_info_helpers.h" #include "test/support/src/helpers.h" +#include "test/support/src/vfs_helpers.h" +#include "tiledb/api/c_api/array_schema/array_schema_api_internal.h" +#include "tiledb/api/c_api/fragment_info/fragment_info_api_internal.h" +#include "tiledb/api/c_api/subarray/subarray_api_internal.h" +#include "tiledb/common/arithmetic.h" #include "tiledb/common/scoped_executor.h" #include "tiledb/common/stdx_string.h" #include "tiledb/sm/c_api/tiledb_struct_def.h" #include "tiledb/sm/cpp_api/tiledb" #include "tiledb/sm/misc/constants.h" +#include "tiledb/sm/query/writers/global_order_writer.h" +#include "tiledb/sm/tile/arithmetic.h" +#include "tiledb/sm/tile/test/arithmetic.h" +#include "tiledb/sm/tile/tile.h" #include +#include using namespace tiledb; +using namespace tiledb::test; struct CPPMaxFragmentSizeFx { const int max_domain = 1000000; @@ -503,3 +520,1146 @@ TEST_CASE( array.close(); } + +/** + * @return the number of cells contained within a subarray, or `std::nullopt` if + * overflow + */ +std::optional subarray_num_cells( + std::span> subarray) { + uint64_t num_cells = 1; + for (const auto& dim : subarray) { + auto maybe = checked_arithmetic::mul(num_cells, dim.num_cells()); + if (!maybe.has_value()) { + return std::nullopt; + } + num_cells = maybe.value(); + } + return num_cells; +} + +/** + * Creates an array with the provided `dimensions` and then + * runs a global order write into `subarray` using `max_fragment_size` to bound + * the fragment size. + * + * Asserts that all created fragments respect `max_fragment_size` and that the + * data read back out for `subarray` matches what we wrote into it. + * + * @return a list of the domains written to each fragment in ascending order + */ +template +std::vector>> +instance_dense_global_order( + const Context& ctx, + const std::string& array_name, + tiledb_layout_t tile_order, + tiledb_layout_t cell_order, + uint64_t max_fragment_size, + const std::vector>& dimensions, + const std::vector>& subarray, + const F& attributes, + std::optional write_unit_num_cells = std::nullopt) { + Domain domain(ctx); + for (uint64_t d = 0; d < dimensions.size(); d++) { + const std::string dname = "d" + std::to_string(d); + auto dim = Dimension::create( + ctx, + dname, + {{dimensions[d].domain.lower_bound, dimensions[d].domain.upper_bound}}, + dimensions[d].extent); + domain.add_dimension(dim); + } + + ArraySchema schema(ctx, TILEDB_DENSE); + schema.set_domain(domain); + schema.set_tile_order(tile_order); + schema.set_cell_order(cell_order); + + const std::vector> ddl_attributes = + templates::ddl::physical_type_attributes(); + for (uint64_t a = 0; a < ddl_attributes.size(); a++) { + const std::string aname = "a" + std::to_string(a + 1); + auto aa = + Attribute::create( + ctx, + aname, + static_cast(std::get<0>(ddl_attributes[a]))) + .set_cell_val_num(std::get<1>(ddl_attributes[a])) + .set_nullable(std::get<2>(ddl_attributes[a])); + schema.add_attribute(aa); + } + + Array::create(array_name, schema); + test::DeleteArrayGuard del(ctx.ptr().get(), array_name.c_str()); + + std::vector api_subarray; + api_subarray.reserve(2 * subarray.size()); + for (const auto& sub_dim : subarray) { + api_subarray.push_back(sub_dim.lower_bound); + api_subarray.push_back(sub_dim.upper_bound); + } + + std::vector tile_extents; + for (const auto& dimension : dimensions) { + tile_extents.push_back(dimension.extent); + } + + sm::NDRange smsubarray; + + // write data, should be split into multiple fragments + templates::query::fragment_field_sizes_t cursor; + { + Array array(ctx, array_name, TILEDB_WRITE); + + Subarray sub(ctx, array); + sub.set_subarray(api_subarray); + + Query query(ctx, array, TILEDB_WRITE); + query.set_layout(TILEDB_GLOBAL_ORDER); + query.set_subarray(sub); + query.ptr().get()->query_->set_fragment_size(max_fragment_size); + + smsubarray = sub.ptr()->subarray()->ndrange(0); + + sm::NDRange smsubarray_aligned = smsubarray; + array.schema() + .ptr() + ->array_schema() + ->domain() + .expand_to_tiles_when_no_current_domain(smsubarray_aligned); + + uint64_t cells_written = 0; + while (templates::query::num_cells(attributes, cursor) < + attributes.num_cells()) { + const uint64_t cells_this_write = std::min( + attributes.num_cells() - cells_written, + write_unit_num_cells.value_or(attributes.num_cells())); + + const F attributes_this_write = + attributes.slice(cells_written, cells_this_write); + + auto field_sizes = templates::query::make_field_sizes( + attributes_this_write, cells_this_write); + templates::query::accumulate_cursor( + attributes_this_write, cursor, field_sizes); + + templates::query::set_fields( + ctx.ptr().get(), + query.ptr().get(), + field_sizes, + const_cast(attributes_this_write), + [](unsigned d) { return "d" + std::to_string(d + 1); }, + [](unsigned a) { return "a" + std::to_string(a + 1); }); + + const auto status = query.submit(); + ASSERTER(status == Query::Status::COMPLETE); + + const uint64_t cells_written_this_write = + templates::query::num_cells( + attributes_this_write, field_sizes); + ASSERTER(cells_written_this_write == cells_this_write); + + cells_written += cells_written_this_write; + ASSERTER( + cells_written == + templates::query::num_cells(attributes, cursor)); + + const auto w = dynamic_cast( + query.ptr()->query_->strategy()); + ASSERTER(w); + const auto g = w->get_global_state(); + ASSERTER(g); + + // Check assumptions about memory buffering. + // There may be a tail of tiles for which we cannot infer whether they + // would fit in the current fragment while also forming a rectangle. + // The writer keeps these in memory until it has enough information + // in the next `submit`. Check our assumptions about those tiles. + uint64_t in_memory_size = 0; + std::optional in_memory_num_tiles; + for (const auto& field : g->last_tiles_) { + // NB: there should always be at least one tile which contains the + // state of the current fragment + ASSERTER(!field.second.empty()); + + for (uint64_t t = 0; t < field.second.size() - 1; t++) { + const auto s = field.second[t].filtered_size(); + ASSERTER(s.has_value()); + in_memory_size += s.value(); + } + + if (in_memory_num_tiles.has_value()) { + ASSERTER(field.second.size() - 1 == in_memory_num_tiles.value()); + } else { + in_memory_num_tiles = field.second.size() - 1; + } + } + // it should be an error if they exceed the max fragment size + ASSERTER(in_memory_size <= max_fragment_size); + + // and if they form a rectangle then we could have written some out + ASSERTER(in_memory_num_tiles.has_value()); + for (uint64_t num_tiles = 0; num_tiles < in_memory_num_tiles.value(); + num_tiles++) { + const sm::IsRectangularDomain rectangle = + sm::is_rectangular_domain( + static_cast(tile_order), + tile_extents, + smsubarray_aligned, + g->dense_.domain_tile_offset_, + g->frag_meta_->tile_index_base() + num_tiles); + if (num_tiles == 0) { + ASSERTER(rectangle == sm::IsRectangularDomain::Yes); + } else { + // if `Never` then we should have started a new fragment + // to avoid buffering up until we hit the tile size + ASSERTER(rectangle == sm::IsRectangularDomain::No); + } + } + } + + query.finalize(); + } + + // then read back + F read; + { + templates::query::resize(read, cursor); + + Array array(ctx, array_name, TILEDB_READ); + + Subarray sub(ctx, array); + sub.set_subarray(api_subarray); + + Query query(ctx, array, TILEDB_READ); + query.set_layout(TILEDB_GLOBAL_ORDER); + query.set_subarray(sub); + + auto read_field_sizes = + templates::query::make_field_sizes(read); + templates::query::set_fields( + ctx.ptr().get(), + query.ptr().get(), + read_field_sizes, + read, + [](unsigned d) { return "d" + std::to_string(d + 1); }, + [](unsigned a) { return "a" + std::to_string(a + 1); }); + + auto st = query.submit(); + ASSERTER(st == Query::Status::COMPLETE); + + ASSERTER(read_field_sizes == cursor); + } + + const std::vector>> fragment_domains = + collect_and_validate_fragment_domains( + ctx, + static_cast(tile_order), + array_name, + tile_extents, + smsubarray, + max_fragment_size); + + // this is last because a fragment domain mismatch is more informative + ASSERTER(read == attributes); + + return fragment_domains; +} + +template +std::vector>> +instance_dense_global_order( + const Context& ctx, + const std::string& array_name, + tiledb_layout_t tile_order, + tiledb_layout_t cell_order, + uint64_t max_fragment_size, + const std::vector>& dimensions, + const std::vector>& subarray, + std::optional write_unit_num_cells = std::nullopt) { + const std::optional num_cells = subarray_num_cells(subarray); + ASSERTER(num_cells.has_value()); + + const int a_offset = 77; + std::vector a_write; + a_write.reserve(num_cells.value()); + for (int i = 0; i < static_cast(num_cells.value()); i++) { + a_write.push_back(a_offset + i); + } + + templates::Fragment, std::tuple> attributes; + std::get<0>(attributes.attributes()) = a_write; + + return instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + attributes, + write_unit_num_cells); +} + +/** + * Tests that the max fragment size parameter is properly respected + * for global order writes to dense arrays. + */ +TEST_CASE("C++ API: Max fragment size dense array", "[cppapi][max-frag-size]") { + VFSTestSetup vfs; + Context ctx(vfs.ctx()); + + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order"); + + const tiledb_layout_t tile_order = + GENERATE(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + const tiledb_layout_t cell_order = + GENERATE(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + + DYNAMIC_SECTION( + "tile_order = " << sm::layout_str(static_cast(tile_order)) + << ", cell_order = " + << sm::layout_str(static_cast(cell_order))) { + // each tile is a full row of a 2D array + // NB: since each tile is a whole row we observe the same results regardless + // of tile order + SECTION("Row tiles") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + constexpr uint64_t max_fragment_size = 64 * 1024; + + constexpr size_t span_d2 = 10000; + const std::vector dimensions = { + Dim(0, std::numeric_limits::max() - 1, 1), + Dim(0, span_d2 - 1, span_d2)}; + + const uint64_t base_d1 = 12345; + const uint64_t num_rows = GENERATE(1, 2, 4, 8); + const std::vector subarray = { + Dom(base_d1 + 0, base_d1 + num_rows - 1), Dom(0, span_d2 - 1)}; + + const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); + + DYNAMIC_SECTION( + "num_rows = " << num_rows << ", write_unit_num_cells = " + << write_unit_num_cells) { + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional{write_unit_num_cells}); + + std::vector> expect; + for (uint64_t r = 0; r < num_rows; r++) { + expect.push_back( + {Dom(base_d1 + r, base_d1 + r), Dom(0, span_d2 - 1)}); + } + + CHECK(expect == actual); + } + } + + // each tile is some rectangle of a 2D array + SECTION("Rectangle tiles") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + const uint64_t d1_extent = GENERATE(8, 4); + constexpr size_t d2_span = 10000; + REQUIRE(d2_span % d1_extent == 0); // for row major + + const uint64_t d1_subarray = 16; + REQUIRE(d2_span % d1_subarray == 0); // for column major + + const std::vector dimensions = { + Dim(0, std::numeric_limits::max() - 1, d1_extent), + Dim(0, d2_span - 1, d2_span / d1_extent)}; + + const uint64_t d1_start_offset = GENERATE(0, 1); + const uint64_t d1_end_offset = GENERATE(0, 1); + const uint64_t d1_start = 100 + d1_start_offset; + const uint64_t d1_end = d1_start + d1_subarray - 1 - d1_end_offset; + const std::vector subarray = { + Dom(d1_start, d1_end), Dom(0, d2_span - 1)}; + + const uint64_t max_fragment_size = 4 * 64 * 1024; + + const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); + + DYNAMIC_SECTION( + "start_offset = " + << d1_start_offset << ", end_offset = " << d1_end_offset + << ", extent = " << d1_extent + << ", write_unit_num_cells = " << write_unit_num_cells) { + if (d1_extent == 8) { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray), + expect); + } else if (d1_start_offset + d1_end_offset > 0) { + // if this constraint is ever relaxed this test must be extended + // with new inputs which are offset within a tile + const auto expect = Catch::Matchers::ContainsSubstring( + "the subarray must coincide with the tile bounds"); + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional(write_unit_num_cells)), + expect); + } else { + std::vector> expect; + if (tile_order == TILEDB_ROW_MAJOR) { + expect = { + {Dom(d1_start + 0 * d1_extent, d1_start + 1 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 1 * d1_extent, d1_start + 2 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 2 * d1_extent, d1_start + 3 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {Dom(d1_start + 3 * d1_extent, d1_start + 4 * d1_extent - 1), + Dom(0, d2_span - 1)}}; + } else { + expect = { + {Dom(d1_start, d1_start + d1_subarray - 1), + Dom(0 * (d2_span / 4), 1 * (d2_span / 4) - 1)}, + {Dom(d1_start, d1_start + d1_subarray - 1), + Dom(1 * (d2_span / 4), 2 * (d2_span / 4) - 1)}, + {Dom(d1_start, d1_start + d1_subarray - 1), + Dom(2 * (d2_span / 4), 3 * (d2_span / 4) - 1)}, + {Dom(d1_start, d1_start + d1_subarray - 1), + Dom(3 * (d2_span / 4), 4 * (d2_span / 4) - 1)}, + }; + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray); + + CHECK(expect == actual); + } + } + } + + // Each tile is a rectangular prism of height 1 + // Use the same inputs as above except there is a third outer dimension with + // extent 1 + SECTION("Flat rectangular prism tiles") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + const uint64_t d0_extent = 1; + const Dom d0_height(0, 0); + + const uint64_t d1_extent = GENERATE(8, 4); + constexpr size_t d2_span = 10000; + REQUIRE(d2_span % d1_extent == 0); // for row major + + const uint64_t d1_subarray = 16; + REQUIRE(d2_span % d1_subarray == 0); // for column major + + const std::vector dimensions = { + Dim(0, std::numeric_limits::max() - 1, d0_extent), + Dim(0, std::numeric_limits::max() - 1, d1_extent), + Dim(0, d2_span - 1, d2_span / d1_extent)}; + + const uint64_t d1_start_offset = GENERATE(0, 1); + const uint64_t d1_end_offset = GENERATE(0, 1); + const uint64_t d1_start = 100 + d1_start_offset; + const uint64_t d1_end = d1_start + d1_subarray - 1 - d1_end_offset; + const std::vector subarray = { + d0_height, Dom(d1_start, d1_end), Dom(0, d2_span - 1)}; + + const uint64_t max_fragment_size = 4 * 64 * 1024; + + const uint64_t write_unit_num_cells = GENERATE(0, 64, 1024, 1024 * 1024); + + DYNAMIC_SECTION( + "start_offset = " + << d1_start_offset << ", end_offset = " << d1_end_offset + << ", extent = " << d1_extent + << ", write_unit_num_cells = " << write_unit_num_cells) { + if (d1_extent == 8) { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray), + expect); + } else if (d1_start_offset + d1_end_offset > 0) { + // if this constraint is ever relaxed this test must be extended + // with new inputs which are offset within a tile + const auto expect = Catch::Matchers::ContainsSubstring( + "the subarray must coincide with the tile bounds"); + REQUIRE_THROWS(instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells == 0 ? + std::nullopt : + std::optional(write_unit_num_cells))); + } else { + std::vector> expect; + if (tile_order == TILEDB_ROW_MAJOR) { + expect = { + {d0_height, + Dom(d1_start + 0 * d1_extent, d1_start + 1 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {d0_height, + Dom(d1_start + 1 * d1_extent, d1_start + 2 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {d0_height, + Dom(d1_start + 2 * d1_extent, d1_start + 3 * d1_extent - 1), + Dom(0, d2_span - 1)}, + {d0_height, + Dom(d1_start + 3 * d1_extent, d1_start + 4 * d1_extent - 1), + Dom(0, d2_span - 1)}}; + } else { + expect = { + {d0_height, + Dom(d1_start, d1_start + d1_subarray - 1), + Dom(0 * (d2_span / 4), 1 * (d2_span / 4) - 1)}, + {d0_height, + Dom(d1_start, d1_start + d1_subarray - 1), + Dom(1 * (d2_span / 4), 2 * (d2_span / 4) - 1)}, + {d0_height, + Dom(d1_start, d1_start + d1_subarray - 1), + Dom(2 * (d2_span / 4), 3 * (d2_span / 4) - 1)}, + {d0_height, + Dom(d1_start, d1_start + d1_subarray - 1), + Dom(3 * (d2_span / 4), 4 * (d2_span / 4) - 1)}, + }; + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray); + + CHECK(expect == actual); + } + } + } + } + + // examples found from the rapidcheck test + SECTION("Shrinking") { + using Dim = templates::Dimension; + using Dom = templates::Domain; + + SECTION("Example 1") { + Dim d1(0, 0, 1); + Dim d2(0, 0, 1); + Dom s1(0, 0); + Dom s2(0, 0); + const uint64_t max_fragment_size = 24; + + instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {d1, d2}, + {s1, s2}); + } + + SECTION("Example 2") { + Dim d1(1, 26, 2); + Dim d2(0, 0, 1); + Dom s1(1, 2); + Dom s2(0, 0); + const uint64_t max_fragment_size = 28; + + instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {d1, d2}, + {s1, s2}); + } + } +} + +/** + * @return a generator which prdocues subarrays whose bounds are aligned to the + * tiles of `arraydomain` + */ +namespace rc { +template +Gen::domain_type>> +make_tile_aligned_subarray( + const std::vector>& arraydomain) { + using Dom = typename templates::Dimension::domain_type; + + // dense subarrays have to be aligned to tile boundaries + // so choose the tiles in each dimension that the subarray will overlap + std::vector>> gen_subarray_tiles; + for (const auto& dimension : arraydomain) { + const uint64_t tile_ub = + (dimension.domain.upper_bound - dimension.domain.lower_bound) / + dimension.extent; + gen_subarray_tiles.push_back(make_range( + templates::Domain(0, std::min(64, tile_ub)))); + } + + return gen::exec([gen_subarray_tiles, arraydomain]() { + std::vector> subarray_tiles; + for (const auto& gen_dim : gen_subarray_tiles) { + subarray_tiles.push_back(*gen_dim); + } + + std::vector subarray; + auto to_subarray = [&]() -> std::vector& { + subarray.clear(); + for (uint64_t d = 0; d < arraydomain.size(); d++) { + subarray.push_back(Dom( + arraydomain[d].domain.lower_bound + + subarray_tiles[d].lower_bound * arraydomain[d].extent, + arraydomain[d].domain.lower_bound + + (subarray_tiles[d].upper_bound + 1) * arraydomain[d].extent - + 1)); + } + return subarray; + }; + + uint64_t num_cells_per_tile = 1; + for (const auto& dim : arraydomain) { + num_cells_per_tile *= dim.extent; + } + + // clamp to a hopefully reasonable limit (if the other attempts failed) + // avoid too many cells, and avoid too many tiles + std::optional num_cells; + while (!(num_cells = subarray_num_cells(to_subarray())).has_value() || + num_cells.value() >= 1024 * 1024 * 4 || + (num_cells.value() / num_cells_per_tile) >= 16 * 1024) { + for (uint64_t d = subarray.size(); d > 0; --d) { + auto& dtiles = subarray_tiles[d - 1]; + if (dtiles.num_cells() > 4) { + dtiles.upper_bound = (dtiles.lower_bound + dtiles.upper_bound) / 2; + break; + } + } + } + + return to_subarray(); + }); +} + +} // namespace rc + +/** + * Generates an arbitrary expected-to-not-error input to + * `instance_dense_global_order` of an appropriate size for the given + * `dimensions`. + * + * "Appropriate size" means tiles with at most `1024 * 128` cells, and a write + * domain with at most `1024 * 1024 * 4` cells (see + * `make_tile_aligned_subarray`). We expect that this should allow inputs which + * are large enough to be interesting but not so large that each instance takes + * a long time. + * + * Inputs generated by this test function are expected to successfully write + * fragments within the generated max fragment size. The maximum fragment size + * is a number of bytes which represents between 1 and 8 hyperrows. + */ +template +void rapidcheck_dense_array( + Context& ctx, + const std::string& array_name, + const std::vector>& dimensions) { + uint64_t num_cells_per_tile = 1; + for (const auto& dim : dimensions) { + num_cells_per_tile *= dim.extent; + } + RC_PRE(num_cells_per_tile <= 1024 * 128); + + const tiledb_layout_t tile_order = + *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + const tiledb_layout_t cell_order = + *rc::gen::element(TILEDB_ROW_MAJOR, TILEDB_COL_MAJOR); + + const uint64_t tile_size = num_cells_per_tile * sizeof(int); + const uint64_t filter_chunk_size = + sm::WriterTile::compute_chunk_size(tile_size, sizeof(int)); + const uint64_t num_filter_chunks_per_tile = + (tile_size + filter_chunk_size - 1) / filter_chunk_size; + + const uint64_t estimate_single_tile_fragment_size = + num_cells_per_tile * sizeof(int) // data + + sizeof(uint64_t) // prefix containing the number of chunks + + num_filter_chunks_per_tile * 3 * sizeof(uint32_t); // chunk sizes + + const auto subarray = + *rc::make_tile_aligned_subarray(dimensions); + + uint64_t num_tiles_per_hyperrow = 1; + for (uint64_t i = 0; i < dimensions.size() - 1; i++) { + const uint64_t dim = + (tile_order == TILEDB_ROW_MAJOR ? i + 1 : dimensions.size() - i - 2); + num_tiles_per_hyperrow *= dimensions[dim].num_tiles(subarray[dim]); + } + + const uint64_t num_tiles_total = + num_tiles_per_hyperrow * + (tile_order == TILEDB_ROW_MAJOR ? + (dimensions[0].num_tiles(subarray[0])) : + (dimensions.back().num_tiles(subarray.back()))); + + auto gen_fragment_size = rc::gen::inRange( + estimate_single_tile_fragment_size, + num_tiles_per_hyperrow * estimate_single_tile_fragment_size * 8); + const uint64_t max_fragment_size = *gen_fragment_size; + + auto gen_write_unit_num_cells = + rc::gen::inRange(1, num_tiles_total * num_cells_per_tile); + const uint64_t write_unit_num_cells = *gen_write_unit_num_cells; + + instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + dimensions, + subarray, + write_unit_num_cells); +} + +TEST_CASE( + "C++ API: Max fragment size dense array rapidcheck 1d", + "[cppapi][max-frag-size][rapidcheck]") { + static constexpr auto DT = sm::Datatype::UINT64; + using Dim64 = templates::Dimension
; + using Dom64 = Dim64::domain_type; + + VFSTestSetup vfs; + Context ctx(vfs.ctx()); + + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_1d"); + + SECTION("Shrinking") { + instance_dense_global_order( + ctx, + array_name, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 2396, + {Dim64(0, 8929, 594)}, + {Dom64(0, 2969)}); + } + + rc::prop("max fragment size dense 1d", [&]() { + Dim64 d1 = *rc::make_dimension
(8192); + + rapidcheck_dense_array
(ctx, array_name, {d1}); + }); +} + +TEST_CASE( + "C++ API: Max fragment size dense array rapidcheck 2d", + "[cppapi][max-frag-size][rapidcheck]") { + static constexpr auto DT = sm::Datatype::UINT64; + using Dim64 = templates::Dimension
; + using Dom64 = Dim64::domain_type; + + VFSTestSetup vfs; + Context ctx(vfs.ctx()); + + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_2d"); + + SECTION("Shrinking") { + SECTION("Example 1") { + instance_dense_global_order( + ctx, + array_name, + TILEDB_ROW_MAJOR, + TILEDB_COL_MAJOR, + 48, + {Dim64(0, 116, 1), Dim64(0, 0, 1)}, + {Dom64(2, 20), Dom64(0, 0)}); + } + + SECTION("Example 2") { + instance_dense_global_order( + ctx, + array_name, + TILEDB_COL_MAJOR, + TILEDB_ROW_MAJOR, + 24, + {Dim64(0, 60, 1), Dim64(0, 20, 1)}, + {Dom64(0, 1), Dom64(0, 1)}); + } + + SECTION("Example 3") { + instance_dense_global_order( + ctx, + array_name, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 48, + {Dim64(0, 35, 1), Dim64(0, 420, 1)}, + {Dom64(0, 1), Dom64(0, 4)}, + 1); + } + + SECTION("Example 4") { + /* + * In this example we end up with a fragment which fills all but one tile + * of a single row. The last tile in the row has to be its own fragment. + */ + auto fragments = instance_dense_global_order( + ctx, + array_name, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 924, + {Dim64(0, 304, 8), Dim64(0, 147, 2)}, + {Dom64(0, 31), Dom64(0, 23)}, + 41); + } + } + + rc::prop("max fragment size dense 2d", [&]() { + Dim64 d1 = *rc::make_dimension
(128); + Dim64 d2 = *rc::make_dimension
(128); + + rapidcheck_dense_array
(ctx, array_name, {d1, d2}); + }); +} + +TEST_CASE( + "C++ API: Max fragment size dense array rapidcheck 3d", + "[cppapi][max-frag-size][rapidcheck]") { + static constexpr auto DT = sm::Datatype::UINT64; + using Dim64 = templates::Dimension
; + using Dom64 = Dim64::domain_type; + + VFSTestSetup vfs; + Context ctx(vfs.ctx()); + + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_rapidcheck_3d"); + + SECTION("Shrinking") { + instance_dense_global_order( + ctx, + array_name, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + 2160, + {Dim64(0, 85, 5), Dim64(0, 102, 2), Dim64(0, 37, 1)}, + {Dom64(5, 19), Dom64(4, 15), Dom64(1, 6)}); + } + + rc::prop("max fragment size dense 3d", [&]() { + Dim64 d1 = *rc::make_dimension
(32); + Dim64 d2 = *rc::make_dimension
(32); + Dim64 d3 = *rc::make_dimension
(32); + + rapidcheck_dense_array
(ctx, array_name, {d1, d2, d3}); + }); +} + +/** + * Test some edge cases induced by variable-length tiles + */ +TEST_CASE( + "C++ API: Max fragment size dense array var size tiles", + "[cppapi][max-frag-size]") { + VFSTestSetup vfs; + Context ctx(vfs.ctx()); + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_var"); + + using Dim64 = templates::Dimension; + using Dom64 = Dim64::domain_type; + + using F = templates::Fragment, std::tuple>>; + + const tiledb_layout_t tile_order = TILEDB_ROW_MAJOR; + const tiledb_layout_t cell_order = TILEDB_ROW_MAJOR; + + SECTION("Rectangle tiles") { + const uint64_t d1_extent = 8; + const uint64_t d2_span = 10000; + REQUIRE(d2_span % d1_extent == 0); + + const uint64_t d2_extent = d2_span / d1_extent; + + const Dim64 row(0, std::numeric_limits::max() - 1, d1_extent); + const Dim64 col(0, d2_span - 1, d2_extent); + + const Dom64 subrow(0, 2 * d1_extent - 1); + const Dom64 subcol = col.domain; + + auto make_subcol = [&](uint64_t start_tile, uint64_t end_tile) -> Dom64 { + const uint64_t tile_span = d2_extent * d1_extent / 8; + return Dom64(tile_span * start_tile, tile_span * end_tile - 1); + }; + + const Dom64 subrow_0(0, d1_extent - 1); + const Dom64 subrow_1(d1_extent, 2 * d1_extent - 1); + + const std::optional num_cells = + subarray_num_cells(std::vector{subrow, subcol}); + REQUIRE(num_cells.has_value()); + + const uint64_t approx_tiles_per_fragment = GENERATE(4, 9); + const uint64_t max_fragment_size = approx_tiles_per_fragment * 64 * 1024; + + F attributes; + attributes.reserve(num_cells.value()); + + const std::optional write_unit_num_cells = GENERATE_COPY( + std::optional{}, + 64, + 1024, + 1024 * 1024, + num_cells.value() - 1); + + const uint64_t num_cells_per_tile = d1_extent * d2_extent; + + DYNAMIC_SECTION( + "approx_tiles_per_fragment = " << approx_tiles_per_fragment) { + DYNAMIC_SECTION( + "write_unit_num_cells = " + << (write_unit_num_cells.has_value() ? + std::to_string(write_unit_num_cells.value()) : + "unlimited")) { + SECTION("Even") { + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = std::to_string(c); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + if (approx_tiles_per_fragment == 4) { + expect.push_back({subrow_0, make_subcol(0, 4)}); + expect.push_back({subrow_0, make_subcol(4, 8)}); + expect.push_back({subrow_1, make_subcol(0, 3)}); + expect.push_back({subrow_1, make_subcol(3, 6)}); + expect.push_back({subrow_1, make_subcol(6, 8)}); + } else { + expect.push_back({subrow_0, subcol}); + expect.push_back({subrow_1, subcol}); + } + CHECK(expect == actual); + } + + SECTION("Skew first tile") { + // inflate all the records of the first tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (c < num_cells_per_tile ? "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + if (approx_tiles_per_fragment == 4) { + expect.push_back({subrow_0, make_subcol(0, 2)}); + expect.push_back({subrow_0, make_subcol(2, 6)}); + expect.push_back({subrow_0, make_subcol(6, 8)}); + expect.push_back({subrow_1, make_subcol(0, 3)}); + expect.push_back({subrow_1, make_subcol(3, 6)}); + expect.push_back({subrow_1, make_subcol(6, 8)}); + } else { + expect.push_back({subrow_0, make_subcol(0, 7)}); + expect.push_back({subrow_0, make_subcol(7, 8)}); + expect.push_back({subrow_1, subcol}); + } + CHECK(expect == actual); + } + + SECTION("Skew second tile") { + // inflate all the records of the second tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (num_cells_per_tile <= c && c < 2 * num_cells_per_tile ? + "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + if (approx_tiles_per_fragment == 4) { + expect.push_back({subrow_0, make_subcol(0, 2)}); + expect.push_back({subrow_0, make_subcol(2, 6)}); + expect.push_back({subrow_0, make_subcol(6, 8)}); + expect.push_back({subrow_1, make_subcol(0, 3)}); + expect.push_back({subrow_1, make_subcol(3, 6)}); + expect.push_back({subrow_1, make_subcol(6, 8)}); + } else { + expect.push_back({subrow_0, make_subcol(0, 7)}); + expect.push_back({subrow_0, make_subcol(7, 8)}); + expect.push_back({subrow_1, subcol}); + } + CHECK(expect == actual); + } + + SECTION("Skew last tile") { + // inflate all the records of the last tile + for (uint64_t c = 0; c < num_cells.value(); c++) { + const std::string str = + (num_cells.value() - num_cells_per_tile <= c ? + "foobargubquux" + std::to_string(c) : + std::to_string(c)); + std::get<0>(attributes.attributes()) + .push_back(std::span(str.begin(), str.end())); + } + + const auto actual = instance_dense_global_order( + ctx, + array_name, + tile_order, + cell_order, + max_fragment_size, + {row, col}, + {subrow, subcol}, + attributes, + write_unit_num_cells); + + std::vector> expect; + if (approx_tiles_per_fragment == 4) { + expect.push_back({subrow_0, make_subcol(0, 4)}); + expect.push_back({subrow_0, make_subcol(4, 8)}); + expect.push_back({subrow_1, make_subcol(0, 3)}); + expect.push_back({subrow_1, make_subcol(3, 6)}); + expect.push_back({subrow_1, make_subcol(6, 7)}); + expect.push_back({subrow_1, make_subcol(7, 8)}); + } else { + expect.push_back({subrow_0, subcol}); + expect.push_back({subrow_1, make_subcol(0, 7)}); + expect.push_back({subrow_1, make_subcol(7, 8)}); + } + CHECK(expect == actual); + } + } + } + } +} + +TEST_CASE( + "C++ API: Max fragment size dense unsupported on REST", "[cppapi][rest]") { + VFSTestSetup vfs; + if (!vfs.is_rest()) { + SKIP("Test is only applicable to REST client"); + } + + const std::string array_name = + vfs.array_uri("max_fragment_size_dense_global_order_rest_support"); + + Context ctx(vfs.ctx()); + + using Dim = templates::Dimension; + using Dom = Dim::domain_type; + + Dim d1(0, 0, 1); + Dim d2(0, 0, 1); + Dom s1(0, 0); + Dom s2(0, 0); + const uint64_t max_fragment_size = 24; + + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is not supported for remote global order writes to dense " + "arrays."); + + REQUIRE_THROWS( + instance_dense_global_order( + ctx, + array_name, + TILEDB_ROW_MAJOR, + TILEDB_ROW_MAJOR, + max_fragment_size, + {d1, d2}, + {s1, s2}), + expect); +} diff --git a/test/src/unit-sparse-global-order-reader.cc b/test/src/unit-sparse-global-order-reader.cc index 0a7a519f076..d89ccd4da3e 100644 --- a/test/src/unit-sparse-global-order-reader.cc +++ b/test/src/unit-sparse-global-order-reader.cc @@ -3713,19 +3713,7 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { ASSERTER(cursor_cells + num_cells <= expect.size()); // accumulate - std::apply( - [&](auto&... field) { - std::apply( - [&](auto&... field_cursor) { - std::apply( - [&](const auto&... field_size) { - (field.accumulate_cursor(field_cursor, field_size), ...); - }, - field_sizes); - }, - outcursor); - }, - std::tuple_cat(outdims, outatts)); + templates::query::accumulate_cursor(out, outcursor, field_sizes); if (status == TILEDB_COMPLETED) { break; @@ -3735,15 +3723,7 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { // Clean up. tiledb_query_free(&query); - std::apply( - [outcursor](auto&... outfield) { - std::apply( - [&](const auto&... field_cursor) { - (outfield.finish_multipart_read(field_cursor), ...); - }, - outcursor); - }, - std::tuple_cat(outdims, outatts)); + templates::query::resize(out, outcursor); ASSERTER(expect.dimensions() == outdims); diff --git a/test/support/CMakeLists.txt b/test/support/CMakeLists.txt index 6eb891a6dba..caae00aa1a9 100644 --- a/test/support/CMakeLists.txt +++ b/test/support/CMakeLists.txt @@ -36,7 +36,8 @@ list(APPEND TILEDB_CORE_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/tiledb/sm/c_api") # Gather the test source files set(TILEDB_TEST_SUPPORT_SOURCES - rapidcheck/show.cc + rapidcheck/show/array_schema_templates.cc + rapidcheck/show/query_ast.cc src/array_helpers.cc src/array_schema_helpers.cc src/ast_helpers.h diff --git a/test/support/rapidcheck/array_schema_templates.h b/test/support/rapidcheck/array_schema_templates.h new file mode 100644 index 00000000000..642a25d5b6f --- /dev/null +++ b/test/support/rapidcheck/array_schema_templates.h @@ -0,0 +1,198 @@ +/** + * @file test/support/rapidcheck/array_schema_templates.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines rapidcheck generators for the structures + * defined in test/support/src/array_schema_templates.h. + */ + +#ifndef TILEDB_RAPIDCHECK_ARRAY_SCHEMA_H +#define TILEDB_RAPIDCHECK_ARRAY_SCHEMA_H + +#include +#include +#include + +#include "tiledb/common/arithmetic.h" + +namespace rc { + +using namespace tiledb::test; +using namespace tiledb::test::templates; + +template +Gen> make_domain(std::optional bound = std::nullopt) { + auto bounds = gen::mapcat(gen::arbitrary(), [bound](D lb) { + const D ub_limit = + (bound.has_value() ? + tiledb::common::checked_arithmetic::add(lb, bound.value()) + .value_or(std::numeric_limits::max()) : + std::numeric_limits::max()); + if constexpr (std::is_same_v || std::is_same_v) { + return gen::pair(gen::just(lb), gen::inRange(lb, ub_limit)); + } else { + // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is + // inclusive. So we have to use `int64_t` to avoid overflow. + return gen::pair( + gen::just(lb), + gen::cast(gen::inRange(int64_t(lb), int64_t(ub_limit) + 1))); + } + }); + + return gen::map(bounds, [](std::pair bounds) { + return templates::Domain(bounds.first, bounds.second); + }); +} + +template +struct Arbitrary> { + static Gen> arbitrary() { + return make_domain(); + } +}; + +template +Gen make_extent( + const templates::Domain& domain, std::optional bound = std::nullopt) { + // upper bound on all possible extents to avoid unreasonably + // huge tile sizes + static constexpr D extent_limit = static_cast( + std::is_signed::value ? + std::min( + static_cast(std::numeric_limits::max()), + static_cast(1024 * 16)) : + std::min( + static_cast(std::numeric_limits::max()), + static_cast(1024 * 16))); + + const D extent_bound = + (bound.has_value() ? std::min(bound.value(), extent_limit) : + extent_limit); + + // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is + // inclusive. So we have to be careful to avoid overflow. + + D extent_lower_bound = 1; + D extent_upper_bound; + + const auto bound_distance = tiledb::common::checked_arithmetic::sub( + domain.upper_bound, domain.lower_bound); + if (bound_distance.has_value()) { + extent_upper_bound = + (bound_distance.value() < extent_bound ? bound_distance.value() + 1 : + extent_bound); + } else { + extent_upper_bound = extent_bound; + } + + return gen::inRange(extent_lower_bound, extent_upper_bound + 1); +} + +template +Gen> make_dimension( + std::optional::value_type> extent_bound = + std::nullopt, + std::optional::value_type> domain_bound = + std::nullopt) { + using CoordType = templates::Dimension::value_type; + auto tup = gen::mapcat( + make_domain(domain_bound), + [extent_bound](Domain domain) { + return gen::pair(gen::just(domain), make_extent(domain, extent_bound)); + }); + + return gen::map(tup, [](std::pair, CoordType> tup) { + return templates::Dimension(tup.first, tup.second); + }); +} + +template +struct Arbitrary> { + static Gen> arbitrary() { + return make_dimension(); + } +}; + +template +Gen make_coordinate(const templates::Domain& domain) { + // `gen::inRange` does an exclusive upper bound, + // whereas the domain upper bound is inclusive. + // As a result some contortion is required to deal + // with numeric_limits. + if constexpr (std::is_same_v) { + // NB: poor performance with small domains for sure + return gen::suchThat( + gen::map( + gen::string(), + [](std::string s) { + StringDimensionCoordType v(s.begin(), s.end()); + return v; + }), + [domain](const StringDimensionCoordType& s) { + return domain.lower_bound <= s && s <= domain.upper_bound; + }); + } else if constexpr (std::is_signed::value) { + if (int64_t(domain.upper_bound) < std::numeric_limits::max()) { + return gen::cast(gen::inRange( + int64_t(domain.lower_bound), int64_t(domain.upper_bound + 1))); + } else { + return gen::inRange(domain.lower_bound, domain.upper_bound); + } + } else { + if (uint64_t(domain.upper_bound) < std::numeric_limits::max()) { + return gen::cast(gen::inRange( + uint64_t(domain.lower_bound), uint64_t(domain.upper_bound + 1))); + } else { + return gen::inRange(domain.lower_bound, domain.upper_bound); + } + } +} + +template +Gen> make_range(const templates::Domain& domain) { + return gen::apply( + [](D p1, D p2) { return templates::Domain(p1, p2); }, + make_coordinate(domain), + make_coordinate(domain)); +} + +template <> +void show>(const templates::Domain& domain, std::ostream& os); + +template <> +void show>( + const templates::Domain& domain, std::ostream& os); + +template <> +void show>( + const templates::Dimension& dimension, + std::ostream& os); + +} // namespace rc + +#endif diff --git a/test/support/rapidcheck/array_templates.h b/test/support/rapidcheck/array_templates.h index f2c1dacc232..37762a9ba6b 100644 --- a/test/support/rapidcheck/array_templates.h +++ b/test/support/rapidcheck/array_templates.h @@ -34,6 +34,7 @@ #ifndef TILEDB_RAPIDCHECK_ARRAY_H #define TILEDB_RAPIDCHECK_ARRAY_H +#include #include #include #include @@ -43,139 +44,6 @@ namespace rc { using namespace tiledb::test; using namespace tiledb::test::templates; -template -struct Arbitrary> { - static Gen> arbitrary() { - // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is - // inclusive. So we have to use `int64_t` to avoid overflow. - auto bounds = gen::mapcat(gen::arbitrary(), [](D lb) { - if constexpr (std::is_same::value) { - return gen::pair( - gen::just(lb), gen::inRange(lb, std::numeric_limits::max())); - } else if constexpr (std::is_same::value) { - return gen::pair( - gen::just(lb), gen::inRange(lb, std::numeric_limits::max())); - } else { - auto ub_limit = int64_t(std::numeric_limits::max()) + 1; - return gen::pair( - gen::just(lb), gen::cast(gen::inRange(int64_t(lb), ub_limit))); - } - }); - - return gen::map(bounds, [](std::pair bounds) { - return templates::Domain(bounds.first, bounds.second); - }); - } -}; - -/** - * @return `a - b` if it does not overflow, `std::nullopt` if it does - */ -template -std::optional checked_sub(T a, T b) { - if (!std::is_signed::value) { - return (b > a ? std::nullopt : std::optional(a - b)); - } else if (b < 0) { - return ( - std::numeric_limits::max() + b < a ? std::nullopt : - std::optional(a - b)); - } else { - return ( - std::numeric_limits::min() - b > a ? std::nullopt : - std::optional(a - b)); - } -} - -template -Gen make_extent(const templates::Domain& domain) { - // upper bound on all possible extents to avoid unreasonably - // huge tile sizes - static constexpr D extent_limit = static_cast( - std::is_signed::value ? - std::min( - static_cast(std::numeric_limits::max()), - static_cast(1024 * 16)) : - std::min( - static_cast(std::numeric_limits::max()), - static_cast(1024 * 16))); - - // NB: `gen::inRange` is exclusive at the upper end but tiledb domain is - // inclusive. So we have to be careful to avoid overflow. - - D extent_lower_bound = 1; - D extent_upper_bound; - - const auto bound_distance = - checked_sub(domain.upper_bound, domain.lower_bound); - if (bound_distance.has_value()) { - extent_upper_bound = - (bound_distance.value() < extent_limit ? bound_distance.value() + 1 : - extent_limit); - } else { - extent_upper_bound = extent_limit; - } - - return gen::inRange(extent_lower_bound, extent_upper_bound + 1); -} - -template -struct Arbitrary> { - static Gen> arbitrary() { - using CoordType = templates::Dimension::value_type; - auto tup = gen::mapcat( - gen::arbitrary>(), [](Domain domain) { - return gen::pair(gen::just(domain), make_extent(domain)); - }); - - return gen::map(tup, [](std::pair, CoordType> tup) { - return templates::Dimension(tup.first, tup.second); - }); - } -}; - -template -Gen make_coordinate(const templates::Domain& domain) { - // `gen::inRange` does an exclusive upper bound, - // whereas the domain upper bound is inclusive. - // As a result some contortion is required to deal - // with numeric_limits. - if constexpr (std::is_same_v) { - // NB: poor performance with small domains for sure - return gen::suchThat( - gen::map( - gen::string(), - [](std::string s) { - StringDimensionCoordType v(s.begin(), s.end()); - return v; - }), - [domain](const StringDimensionCoordType& s) { - return domain.lower_bound <= s && s <= domain.upper_bound; - }); - } else if constexpr (std::is_signed::value) { - if (int64_t(domain.upper_bound) < std::numeric_limits::max()) { - return gen::cast(gen::inRange( - int64_t(domain.lower_bound), int64_t(domain.upper_bound + 1))); - } else { - return gen::inRange(domain.lower_bound, domain.upper_bound); - } - } else { - if (uint64_t(domain.upper_bound) < std::numeric_limits::max()) { - return gen::cast(gen::inRange( - uint64_t(domain.lower_bound), uint64_t(domain.upper_bound + 1))); - } else { - return gen::inRange(domain.lower_bound, domain.upper_bound); - } - } -} - -template -Gen> make_range(const templates::Domain& domain) { - return gen::apply( - [](D p1, D p2) { return templates::Domain(p1, p2); }, - make_coordinate(domain), - make_coordinate(domain)); -} - template Gen> make_fragment_1d( bool allow_duplicates, const Domain& d) { @@ -307,10 +175,6 @@ Gen> make_fragment_3d( }); } -void showValue(const templates::Domain& domain, std::ostream& os); -void showValue(const templates::Domain& domain, std::ostream& os); -void showValue(const templates::Domain& domain, std::ostream& os); - namespace detail { /** diff --git a/test/support/rapidcheck/show/array_schema_templates.cc b/test/support/rapidcheck/show/array_schema_templates.cc new file mode 100644 index 00000000000..ca395c902f9 --- /dev/null +++ b/test/support/rapidcheck/show/array_schema_templates.cc @@ -0,0 +1,74 @@ +/** + * @file test/support/rapidcheck/show/array_schema_templates.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides forward declarations of `rc::detail::showValue` + * overloads, which seemingly must be included prior to the rapidcheck + * header files. + */ + +#include +#include +#include + +namespace rc { + +template +void showImpl( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + os << "[" << domain.lower_bound << ", " << domain.upper_bound << "]"; +} + +template <> +void show>( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + showImpl(domain, os); +} + +template <> +void show>( + const tiledb::test::templates::Domain& domain, std::ostream& os) { + showImpl(domain, os); +} + +template +void showImpl( + const tiledb::test::templates::Dimension
& dimension, std::ostream& os) { + os << "{\"domain\": "; + showImpl(dimension.domain, os); + os << ", \"extent\": " << dimension.extent << "}"; +} + +template <> +void show>( + const templates::Dimension& dimension, + std::ostream& os) { + showImpl(dimension, os); +} + +} // namespace rc diff --git a/test/support/rapidcheck/show.cc b/test/support/rapidcheck/show/query_ast.cc similarity index 93% rename from test/support/rapidcheck/show.cc rename to test/support/rapidcheck/show/query_ast.cc index f3aeb2426db..f895667de4c 100644 --- a/test/support/rapidcheck/show.cc +++ b/test/support/rapidcheck/show/query_ast.cc @@ -1,5 +1,5 @@ /** - * @file test/support/rapidcheck/show.cc + * @file test/support/rapidcheck/show/query_ast.cc * * @section LICENSE * @@ -32,8 +32,11 @@ * header files. */ +#include #include +#include +#include "test/support/src/array_templates.h" #include "tiledb/sm/enums/query_condition_op.h" #include "tiledb/sm/query/ast/query_ast.h" diff --git a/test/support/src/array_schema_templates.h b/test/support/src/array_schema_templates.h new file mode 100644 index 00000000000..bd2b77059a8 --- /dev/null +++ b/test/support/src/array_schema_templates.h @@ -0,0 +1,219 @@ +/** + * @file test/support/src/array_schema_templates.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides templates for generic programming with respect + * to array schema, data types, etc. + */ + +#ifndef TILEDB_ARRAY_SCHEMA_TEMPLATES_H +#define TILEDB_ARRAY_SCHEMA_TEMPLATES_H + +#include "tiledb/type/datatype_traits.h" +#include "tiledb/type/range/range.h" + +#include +#include + +namespace tiledb::test::templates { + +using StringDimensionCoordType = std::vector; +using StringDimensionCoordView = std::span; + +/** + * Constrains types which can be used as the physical type of a dimension. + */ +template +concept DimensionType = + std::is_same_v or requires(const D& coord) { + typename std::is_signed; + { coord < coord } -> std::same_as; + { D(int64_t(coord)) } -> std::same_as; + }; + +/** + * Constrains types which can be used as the physical type of an attribute. + * + * Right now this doesn't constrain anything, it is just a marker for + * readability, and someday we might want it do require something. + * + * This used to have + * ``` + * typename query_buffers::cell_type; + * ``` + * but that was removed to simplify include whatnot and forward declaration etc + */ +template +concept AttributeType = true; + +/** + * A generic, statically-typed range which is inclusive on both ends. + */ +template +struct Domain { + D lower_bound; + D upper_bound; + + Domain() { + } + + Domain(D d1, D d2) + : lower_bound(std::min(d1, d2)) + , upper_bound(std::max(d1, d2)) { + } + + bool operator==(const Domain&) const = default; + + uint64_t num_cells() const { + // FIXME: this is incorrect for 64-bit domains which need to check overflow + if (std::is_signed::value) { + return static_cast(upper_bound) - + static_cast(lower_bound) + 1; + } else { + return static_cast(upper_bound) - + static_cast(lower_bound) + 1; + } + } + + bool contains(D point) const { + return lower_bound <= point && point <= upper_bound; + } + + bool intersects(const Domain& other) const { + return (other.lower_bound <= lower_bound && + lower_bound <= other.upper_bound) || + (other.lower_bound <= upper_bound && + upper_bound <= other.upper_bound) || + (lower_bound <= other.lower_bound && + other.lower_bound <= upper_bound) || + (lower_bound <= other.upper_bound && + other.upper_bound <= upper_bound); + } + + tiledb::type::Range range() const { + return tiledb::type::Range(lower_bound, upper_bound); + } +}; + +/** + * A description of a dimension as it pertains to its datatype. + */ +template +struct Dimension { + using value_type = tiledb::type::datatype_traits
::value_type; + using domain_type = Domain; + + static constexpr tiledb::sm::Datatype DATATYPE = DT; + + Dimension() = default; + Dimension(Domain domain, value_type extent) + : domain(domain) + , extent(extent) { + } + + Dimension(value_type lower_bound, value_type upper_bound, value_type extent) + : Dimension(Domain(lower_bound, upper_bound), extent) { + } + + Domain domain; + value_type extent; + + /** + * @return the number of tiles spanned by the whole domain of this dimension + */ + uint64_t num_tiles() const { + return num_tiles(domain); + } + + /** + * @return the number of tiles spanned by a range in this dimension + */ + uint64_t num_tiles(const domain_type& range) const { + return (range.num_cells() + extent - 1) / extent; + } +}; + +template +struct static_attribute {}; + +template +struct static_attribute { + static constexpr tiledb::sm::Datatype datatype = DATATYPE; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool nullable = false; + + using value_type = + typename tiledb::type::datatype_traits::value_type; + using cell_type = value_type; +}; + +template +struct static_attribute { + static constexpr tiledb::sm::Datatype datatype = DATATYPE; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool nullable = true; + + using value_type = std::optional< + typename tiledb::type::datatype_traits::value_type>; + using cell_type = value_type; +}; + +template +struct static_attribute { + static constexpr tiledb::sm::Datatype datatype = DATATYPE; + static constexpr uint32_t cell_val_num = tiledb::sm::cell_val_num_var; + static constexpr bool nullable = false; + + using value_type = + typename tiledb::type::datatype_traits::value_type; + using cell_type = std::vector; +}; + +template +struct static_attribute { + static constexpr tiledb::sm::Datatype datatype = DATATYPE; + static constexpr uint32_t cell_val_num = tiledb::sm::cell_val_num_var; + static constexpr bool nullable = true; + + using value_type = + typename tiledb::type::datatype_traits::value_type; + using cell_type = std::optional>; +}; + +template +constexpr std::tuple +attribute_properties() { + return { + static_attribute::datatype, + static_attribute::cell_val_num, + static_attribute::nullable}; +} + +} // namespace tiledb::test::templates + +#endif diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index bc75487e8ca..9dc422bc48a 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -42,6 +42,7 @@ #include "tiledb/type/range/range.h" #include +#include #include #include #include @@ -59,9 +60,6 @@ class Dimension; namespace tiledb::test::templates { -using StringDimensionCoordType = std::vector; -using StringDimensionCoordView = std::span; - /** * Adapts a `std::tuple` whose fields are all `GlobalCellCmp` * to itself be `GlobalCellCmp`. @@ -113,6 +111,27 @@ struct global_cell_cmp_std_tuple { StdTuple tup_; }; +/** + * Adapts a span of coordinates for comparison using `GlobalCellCmp`. + */ +template +struct global_cell_cmp_span { + global_cell_cmp_span(std::span values) + : values_(values) { + } + + tiledb::common::UntypedDatumView dimension_datum( + const tiledb::sm::Dimension&, unsigned dim_idx) const { + return UntypedDatumView(&values_[dim_idx], sizeof(Coord)); + } + + const void* coord(unsigned dim) const { + return &values_[dim]; + } + + std::span values_; +}; + /** * Forward declaration of query_buffers * which will be specialized. @@ -123,26 +142,6 @@ struct global_cell_cmp_std_tuple { template struct query_buffers {}; -/** - * Constrains types which can be used as the physical type of a dimension. - */ -template -concept DimensionType = - std::is_same_v or requires(const D& coord) { - typename std::is_signed; - { coord < coord } -> std::same_as; - { D(int64_t(coord)) } -> std::same_as; - }; - -/** - * Constrains types which can be used as the physical type of an attribute. - * - * Right now this doesn't constrain anything, it is just a marker for - * readability, and someday we might want it do require something. - */ -template -concept AttributeType = requires(T) { typename query_buffers::cell_type; }; - /** * Constrains types which can be used as columnar data fragment input. * @@ -165,139 +164,7 @@ concept FragmentType = requires(const T& fragment) { }; /** - * A generic, statically-typed range which is inclusive on both ends. - */ -template -struct Domain { - D lower_bound; - D upper_bound; - - Domain() { - } - - Domain(D d1, D d2) - : lower_bound(std::min(d1, d2)) - , upper_bound(std::max(d1, d2)) { - } - - uint64_t num_cells() const { - // FIXME: this is incorrect for 64-bit domains which need to check overflow - if (std::is_signed::value) { - return static_cast(upper_bound) - - static_cast(lower_bound) + 1; - } else { - return static_cast(upper_bound) - - static_cast(lower_bound) + 1; - } - } - - bool contains(D point) const { - return lower_bound <= point && point <= upper_bound; - } - - bool intersects(const Domain& other) const { - return (other.lower_bound <= lower_bound && - lower_bound <= other.upper_bound) || - (other.lower_bound <= upper_bound && - upper_bound <= other.upper_bound) || - (lower_bound <= other.lower_bound && - other.lower_bound <= upper_bound) || - (lower_bound <= other.upper_bound && - other.upper_bound <= upper_bound); - } - - tiledb::type::Range range() const { - return tiledb::type::Range(lower_bound, upper_bound); - } -}; - -/** - * A description of a dimension as it pertains to its datatype. - */ -template -struct Dimension { - using value_type = tiledb::type::datatype_traits::value_type; - - Dimension() = default; - Dimension(Domain domain, value_type extent) - : domain(domain) - , extent(extent) { - } - - Domain domain; - value_type extent; -}; - -template <> -struct Dimension { - using value_type = StringDimensionCoordType; - - Dimension() { - } - - Dimension(const Domain& domain) - : domain(domain) { - } - - std::optional> domain; -}; - -template -struct static_attribute {}; - -template -struct static_attribute { - static constexpr Datatype datatype = DATATYPE; - static constexpr uint32_t cell_val_num = 1; - static constexpr bool nullable = false; - - using value_type = - typename tiledb::type::datatype_traits::value_type; - using cell_type = value_type; -}; - -template -struct static_attribute { - static constexpr Datatype datatype = DATATYPE; - static constexpr uint32_t cell_val_num = 1; - static constexpr bool nullable = true; - - using value_type = std::optional< - typename tiledb::type::datatype_traits::value_type>; - using cell_type = value_type; -}; - -template -struct static_attribute { - static constexpr Datatype datatype = DATATYPE; - static constexpr uint32_t cell_val_num = tiledb::sm::cell_val_num_var; - static constexpr bool nullable = false; - - using value_type = - typename tiledb::type::datatype_traits::value_type; - using cell_type = std::vector; -}; - -template -struct static_attribute { - static constexpr Datatype datatype = DATATYPE; - static constexpr uint32_t cell_val_num = tiledb::sm::cell_val_num_var; - static constexpr bool nullable = true; - - using value_type = - typename tiledb::type::datatype_traits::value_type; - using cell_type = std::optional>; -}; - -template -constexpr std::tuple attribute_properties() { - return { - static_attribute::datatype, - static_attribute::cell_val_num, - static_attribute::nullable}; -} - -/** +2D) * Schema of named fields for simple evaluation of a query condition */ template @@ -413,7 +280,7 @@ struct QueryConditionEvalSchema { */ bool test( const Fragment& fragment, - int record, + uint64_t record, const tiledb::sm::ASTNode& condition) const { using DimensionTuple = stdx::decay_tuple; using AttributeTuple = stdx::decay_tuple; @@ -526,8 +393,13 @@ struct query_buffers { return *this; } + query_field_size_type make_field_size( + uint64_t offset, uint64_t cell_limit) const { + return sizeof(T) * std::min(cell_limit, values_.size() - offset); + } + query_field_size_type make_field_size(uint64_t cell_limit) const { - return sizeof(T) * std::min(cell_limit, values_.size()); + return make_field_size(0, cell_limit); } int32_t attach_to_query( @@ -559,11 +431,12 @@ struct query_buffers { } void accumulate_cursor( - query_field_size_type& cursor, const query_field_size_type& field_sizes) { + query_field_size_type& cursor, + const query_field_size_type& field_sizes) const { cursor += field_sizes; } - void finish_multipart_read(const query_field_size_type& cursor) { + void resize_to_cursor(const query_field_size_type& cursor) { resize(cursor / sizeof(T)); } @@ -575,6 +448,12 @@ struct query_buffers { values_.insert(std::forward(args)...); } + self_type slice(uint64_t cell_start, uint64_t num_cells) const { + return self_type(std::vector( + values_.begin() + cell_start, + values_.begin() + cell_start + num_cells)); + } + auto begin() { return values_.begin(); } @@ -759,20 +638,35 @@ struct query_buffers> { validity_.end(), from.validity_.begin(), from.validity_.end()); } + self_type slice(uint64_t cell_start, uint64_t num_cells) const { + self_type ret; + ret.values_ = std::vector( + values_.begin() + cell_start, values_.begin() + num_cells); + ret.validity_ = std::vector( + validity_.begin() + cell_start, validity_.begin() + num_cells); + return ret; + } + self_type& operator=(const self_type& other) { values_ = other.values_; validity_ = other.validity_; return *this; } - query_field_size_type make_field_size(uint64_t cell_limit) const { + query_field_size_type make_field_size( + uint64_t offset, uint64_t cell_limit) const { const uint64_t values_size = - sizeof(T) * std::min(cell_limit, values_.size()); + sizeof(T) * std::min(cell_limit, values_.size() - offset); const uint64_t validity_size = - sizeof(uint8_t) * std::min(cell_limit, validity_.size()); + sizeof(uint8_t) * + std::min(cell_limit, validity_.size() - offset); return std::make_pair(values_size, validity_size); } + query_field_size_type make_field_size(uint64_t cell_limit) const { + return make_field_size(0, cell_limit); + } + int32_t attach_to_query( tiledb_ctx_t* ctx, tiledb_query_t* query, @@ -814,12 +708,13 @@ struct query_buffers> { } void accumulate_cursor( - query_field_size_type& cursor, const query_field_size_type& field_sizes) { + query_field_size_type& cursor, + const query_field_size_type& field_sizes) const { std::get<0>(cursor) += std::get<0>(field_sizes); std::get<1>(cursor) += std::get<1>(field_sizes); } - void finish_multipart_read(const query_field_size_type& cursor) { + void resize_to_cursor(const query_field_size_type& cursor) { resize(std::get<0>(cursor) / sizeof(T)); } }; @@ -912,19 +807,61 @@ struct query_buffers> { values_.insert(values_.end(), from.values_.begin(), from.values_.end()); } + self_type slice(uint64_t cell_start, uint64_t num_cells) const { + std::vector slice_offsets( + offsets_.begin() + cell_start, + offsets_.begin() + cell_start + num_cells); + std::vector slice_values; + for (uint64_t o = cell_start; o < cell_start + num_cells; o++) { + const uint64_t end = + (o + 1 == offsets_.size() ? values_.size() : offsets_[o + 1]); + slice_values.insert( + slice_values.end(), + values_.begin() + offsets_[o], + values_.begin() + end); + } + + const uint64_t offset_adjustment = slice_offsets[0]; + for (uint64_t& offset : slice_offsets) { + offset -= offset_adjustment; + } + + self_type ret; + ret.offsets_ = slice_offsets; + ret.values_ = slice_values; + return ret; + } + self_type& operator=(const self_type& other) { offsets_ = other.offsets_; values_ = other.values_; return *this; } - query_field_size_type make_field_size(uint64_t cell_limit) const { - const uint64_t values_size = sizeof(T) * values_.size(); + query_field_size_type make_field_size( + uint64_t cell_offset, uint64_t cell_limit) const { + const uint64_t num_cells = + std::min(cell_limit, offsets_.size() - cell_offset); + const uint64_t offsets_size = - sizeof(uint64_t) * std::min(cell_limit, offsets_.size()); + sizeof(uint64_t) * + std::min(num_cells, offsets_.size() - cell_offset); + + uint64_t values_size; + if (cell_offset + num_cells + 1 < offsets_.size()) { + values_size = sizeof(T) * + (offsets_[cell_offset + num_cells] - offsets_[cell_offset]); + } else { + values_size = sizeof(T) * (values_.size() - offsets_[cell_offset]); + } + return std::make_pair(values_size, offsets_size); } + query_field_size_type make_field_size(uint64_t cell_limit) const { + return make_field_size(0, cell_limit); + } + int32_t attach_to_query( tiledb_ctx_t* ctx, tiledb_query_t* query, @@ -974,12 +911,13 @@ struct query_buffers> { } void accumulate_cursor( - query_field_size_type& cursor, const query_field_size_type& field_sizes) { + query_field_size_type& cursor, + const query_field_size_type& field_sizes) const { std::get<0>(cursor) += std::get<0>(field_sizes); std::get<1>(cursor) += std::get<1>(field_sizes); } - void finish_multipart_read(const query_field_size_type& cursor) { + void resize_to_cursor(const query_field_size_type& cursor) { values_.resize(std::get<0>(cursor) / sizeof(T)); offsets_.resize(std::get<1>(cursor) / sizeof(uint64_t)); } @@ -1086,15 +1024,26 @@ struct query_buffers>> { return *this; } - query_field_size_type make_field_size(uint64_t cell_limit) const { - const uint64_t values_size = sizeof(T) * values_.size(); + query_field_size_type make_field_size( + uint64_t cell_offset, uint64_t cell_limit) const { const uint64_t offsets_size = - sizeof(uint64_t) * std::min(cell_limit, offsets_.size()); + sizeof(uint64_t) * + std::min(cell_limit, offsets_.size() - cell_offset); const uint64_t validity_size = - sizeof(uint8_t) * std::min(cell_limit, validity_.size()); + sizeof(uint8_t) * + std::min(cell_limit, validity_.size() - cell_offset); + + // NB: unlike the above this can just be the whole buffer + // since offsets is what determines the values + const uint64_t values_size = sizeof(T) * values_.size(); + return std::make_tuple(values_size, offsets_size, validity_size); } + query_field_size_type make_field_size(uint64_t cell_limit) const { + return make_field_size(0, cell_limit); + } + int32_t attach_to_query( tiledb_ctx_t* ctx, tiledb_query_t* query, @@ -1157,13 +1106,14 @@ struct query_buffers>> { } void accumulate_cursor( - query_field_size_type& cursor, const query_field_size_type& field_sizes) { + query_field_size_type& cursor, + const query_field_size_type& field_sizes) const { std::get<0>(cursor) += std::get<0>(field_sizes); std::get<1>(cursor) += std::get<1>(field_sizes); std::get<2>(cursor) += std::get<2>(field_sizes); } - void finish_multipart_read(const query_field_size_type& cursor) { + void resize_to_cursor(const query_field_size_type& cursor) { values_.resize(std::get<0>(cursor) / sizeof(T)); offsets_.resize(std::get<1>(cursor) / sizeof(uint64_t)); validity_.resize(std::get<2>(cursor) / sizeof(uint8_t)); @@ -1233,7 +1183,7 @@ struct Fragment { if constexpr (std::tuple_size::value == 0) { return std::get<0>(atts_).num_cells(); } else { - return std::get<0>(atts_).num_cells(); + return std::get<0>(dims_).num_cells(); } } @@ -1288,6 +1238,30 @@ struct Fragment { }, std::tuple_cat(dimensions(), attributes())); } + + /** + * @return a new fragment containing the cells in the range `[cell_start, + * cell_start + num_cells)` + */ + self_type slice(uint64_t cell_start, uint64_t num_cells) const { + const auto dims = std::apply( + [&](Ts&... dst) { + return std::make_tuple(dst.slice(cell_start, num_cells)...); + }, + dimensions()); + const auto atts = std::apply( + [&](Ts&... dst) { + return std::make_tuple(dst.slice(cell_start, num_cells)...); + }, + attributes()); + + return self_type{.dims_ = dims, .atts_ = atts}; + } + + bool operator==(const self_type& other) const { + return dimensions() == other.dimensions() && + attributes() == other.attributes(); + } }; /** @@ -1368,7 +1342,7 @@ struct query_applicator { * @return a tuple containing the size of each input field */ static auto make_field_sizes( - const std::tuple&...> fields, + const std::tuple&...> fields, uint64_t cell_limit = std::numeric_limits::max()) { std::optional num_cells; auto make_field_size = [&](const query_buffers& field) { @@ -1391,6 +1365,27 @@ struct query_applicator { fields); } + /** + * @return a tuple containing the size of each input field to write for a + * range of input cells [cell_offset, cell_offset + cell_limit] + */ + static auto write_make_field_sizes( + const std::tuple&...> fields, + uint64_t cell_offset, + uint64_t cell_limit = std::numeric_limits::max()) { + auto write_make_field_size = [&]( + const query_buffers& field) { + const auto field_size = field.make_field_size(cell_offset, cell_limit); + return field_size; + }; + + return std::apply( + [&](const auto&... field) { + return std::make_tuple(write_make_field_size(field)...); + }, + fields); + } + /** * Sets buffers on `query` for the variadic `fields` and `fields_sizes` */ @@ -1407,7 +1402,11 @@ struct query_applicator { const auto& field_cursor) { const auto rc = field.attach_to_query(ctx, query, field_size, name, field_cursor); - ASSERTER(std::optional() == error_if_any(ctx, rc)); + + // some versions of gcc have a false positive here for + // -Wmaybe-uninitialized, so do this instead of comparing against + // `std::optional` + ASSERTER("" == error_if_any(ctx, rc).value_or("")); }; unsigned d = 0; @@ -1471,9 +1470,10 @@ namespace query { */ template auto make_field_sizes( - F& fragment, uint64_t cell_limit = std::numeric_limits::max()) { - typename F::DimensionBuffersRef dims = fragment.dimensions(); - typename F::AttributeBuffersRef atts = fragment.attributes(); + const F& fragment, + uint64_t cell_limit = std::numeric_limits::max()) { + typename F::DimensionBuffersConstRef dims = fragment.dimensions(); + typename F::AttributeBuffersConstRef atts = fragment.attributes(); return [cell_limit](std::tuple fields) { return query_applicator::make_field_sizes( fields, cell_limit); @@ -1483,7 +1483,20 @@ auto make_field_sizes( template using fragment_field_sizes_t = decltype(make_field_sizes( - std::declval(), std::declval())); + std::declval(), std::declval())); + +template +fragment_field_sizes_t write_make_field_sizes( + const F& fragment, + uint64_t cell_offset, + uint64_t cell_limit = std::numeric_limits::max()) { + typename F::DimensionBuffersConstRef dims = fragment.dimensions(); + typename F::AttributeBuffersConstRef atts = fragment.attributes(); + return [cell_offset, cell_limit](std::tuple fields) { + return query_applicator...>:: + write_make_field_sizes(fields, cell_offset, cell_limit); + }(std::tuple_cat(dims, atts)); +} /** * Apply field cursor and sizes to each field of `fragment`. @@ -1510,6 +1523,46 @@ void apply_cursor( std::tuple_cat(dims, atts)); } +/** + * Advances field cursors `cursor` over `fragment` by the amount of data from + * `field_sizes` + */ +template +void accumulate_cursor( + const F& fragment, + fragment_field_sizes_t& cursor, + const fragment_field_sizes_t& field_sizes) { + std::apply( + [&](auto&... field) { + std::apply( + [&](auto&... field_cursor) { + std::apply( + [&](const auto&... field_size) { + (field.accumulate_cursor(field_cursor, field_size), ...); + }, + field_sizes); + }, + cursor); + }, + std::tuple_cat(fragment.dimensions(), fragment.attributes())); +} + +/** + * Resizes the fields of `fragment` to the sizes given by `cursor`. + */ +template +void resize(F& fragment, const fragment_field_sizes_t& cursor) { + std::apply( + [cursor](auto&... field) { + std::apply( + [&](const auto&... field_cursor) { + (field.resize_to_cursor(field_cursor), ...); + }, + cursor); + }, + std::tuple_cat(fragment.dimensions(), fragment.attributes())); +} + /** * Set buffers on `query` for the tuple of field columns */ @@ -1569,7 +1622,7 @@ uint64_t num_cells(const F& fragment, const auto& field_sizes) { } /** - * Writes a fragment to an array. + * Writes a fragment to a sparse array. */ template void write_fragment( @@ -1604,10 +1657,105 @@ void write_fragment( ASSERTER(num_cells == expect_num_cells); } +/** + * Writes a fragment to a dense array. + */ +template +void write_fragment( + const Fragment& fragment, + Array& forwrite, + const sm::NDRange& subarray, + tiledb_layout_t layout = TILEDB_ROW_MAJOR) { + Query query(forwrite.context(), forwrite, TILEDB_WRITE); + query.set_layout(layout); + + std::vector coords; + for (const auto& dim : subarray) { + coords.push_back(dim.start_as()); + coords.push_back(dim.end_as()); + } + + Subarray sub(query.ctx(), forwrite); + sub.set_subarray(coords); + query.set_subarray(sub); + + auto field_sizes = + make_field_sizes(const_cast(fragment)); + templates::query::set_fields( + query.ctx().ptr().get(), + query.ptr().get(), + field_sizes, + const_cast(fragment), + [](unsigned d) { return "d" + std::to_string(d + 1); }, + [](unsigned a) { return "a" + std::to_string(a + 1); }); + + const auto status = query.submit(); + ASSERTER(status == Query::Status::COMPLETE); + + if (layout == TILEDB_GLOBAL_ORDER) { + query.finalize(); + } + + // check that sizes match what we expect + const uint64_t expect_num_cells = fragment.size(); + const uint64_t num_cells = + templates::query::num_cells(fragment, field_sizes); + + ASSERTER(num_cells == expect_num_cells); +} + } // namespace query namespace ddl { +template +struct cell_type_traits; + +template <> +struct cell_type_traits { + static constexpr sm::Datatype physical_type = sm::Datatype::CHAR; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool is_nullable = false; +}; + +template <> +struct cell_type_traits { + static constexpr sm::Datatype physical_type = sm::Datatype::INT32; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool is_nullable = false; +}; + +template <> +struct cell_type_traits { + static constexpr sm::Datatype physical_type = sm::Datatype::UINT64; + static constexpr uint32_t cell_val_num = 1; + static constexpr bool is_nullable = false; +}; + +template +struct cell_type_traits> { + static constexpr sm::Datatype physical_type = + cell_type_traits::physical_type; + static constexpr uint32_t cell_val_num = std::numeric_limits::max(); + static constexpr bool is_nullable = false; +}; + +template +std::vector> physical_type_attributes() { + std::vector> ret; + auto attr = [&](const T&) { + ret.push_back(std::make_tuple( + cell_type_traits>::physical_type, + cell_type_traits>::cell_val_num, + cell_type_traits>::is_nullable)); + }; + std::apply( + [&](const auto&... value) { (attr(value), ...); }, + typename F::AttributeTuple()); + + return ret; +} + /** * Creates an array with a schema whose dimensions and attributes * come from the simplified arguments. diff --git a/test/support/src/fragment_info_helpers.h b/test/support/src/fragment_info_helpers.h new file mode 100644 index 00000000000..f7899161887 --- /dev/null +++ b/test/support/src/fragment_info_helpers.h @@ -0,0 +1,166 @@ +/** + * @file fragment_info_helpers.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides declarations and definitions of functionality which + * may be common to tests inspecting fragment info and fragment metadata. + */ + +#ifndef TILEDB_TEST_FRAGMENT_INFO_HELPERS_H +#define TILEDB_TEST_FRAGMENT_INFO_HELPERS_H + +#include +#include + +#include "tiledb/api/c_api/fragment_info/fragment_info_api_internal.h" +#include "tiledb/sm/cpp_api/context.h" +#include "tiledb/sm/cpp_api/fragment_info.h" +#include "tiledb/sm/enums/layout.h" +#include "tiledb/sm/fragment/single_fragment_info.h" +#include "tiledb/sm/misc/types.h" +#include "tiledb/sm/tile/test/arithmetic.h" + +#include +#include + +namespace tiledb::test { + +template +std::vector>> +collect_and_validate_fragment_domains( + const Context& ctx, + sm::Layout tile_order, + const std::string& array_name, + const std::span tile_extents, + const sm::NDRange& expect_domain, + uint64_t max_fragment_size) { + const uint64_t num_dimensions = expect_domain.size(); + + FragmentInfo finfo(ctx, array_name); + finfo.load(); + + // collect fragment domains + std::vector>> fragment_domains; + for (uint32_t f = 0; f < finfo.fragment_num(); f++) { + std::vector> this_fragment_domain; + for (uint64_t d = 0; d < num_dimensions; d++) { + D bounds[2]; + finfo.get_non_empty_domain(f, d, &bounds[0]); + this_fragment_domain.push_back( + templates::Domain(bounds[0], bounds[1])); + } + fragment_domains.push_back(this_fragment_domain); + } + + // the fragments are not always emitted in the same order, sort them + auto domain_cmp = [&](const auto& left, const auto& right) { + for (uint64_t di = 0; di < num_dimensions; di++) { + const uint64_t d = + (tile_order == sm::Layout::ROW_MAJOR ? di : num_dimensions - di - 1); + if (left[d].lower_bound < right[d].lower_bound) { + return true; + } else if (left[d].lower_bound > right[d].lower_bound) { + return false; + } else if (left[d].upper_bound < right[d].upper_bound) { + return true; + } else if (left[d].upper_bound > right[d].upper_bound) { + return false; + } + } + return false; + }; + std::vector fragments_in_order(finfo.fragment_num()); + std::iota(fragments_in_order.begin(), fragments_in_order.end(), 0); + std::sort( + fragments_in_order.begin(), + fragments_in_order.end(), + [&](const uint32_t f_left, const uint32_t f_right) -> bool { + const auto& left = fragment_domains[f_left]; + const auto& right = fragment_domains[f_right]; + return domain_cmp(left, right); + }); + std::sort(fragment_domains.begin(), fragment_domains.end(), domain_cmp); + + // validate fragment domains + ASSERTER(!fragment_domains.empty()); + + // fragment domains should be contiguous in global order and cover the whole + // subarray + uint64_t subarray_tile_offset = 0; + for (uint32_t f = 0; f < fragments_in_order.size(); f++) { + const sm::NDRange& internal_domain = + finfo.ptr() + ->fragment_info() + ->single_fragment_info_vec()[fragments_in_order[f]] + .non_empty_domain(); + + const uint64_t f_num_tiles = + compute_num_tiles(tile_extents, internal_domain); + const std::optional f_start_tile = compute_start_tile( + tile_order, tile_extents, expect_domain, internal_domain); + + ASSERTER(f_start_tile == subarray_tile_offset); + subarray_tile_offset += f_num_tiles; + } + ASSERTER( + subarray_tile_offset == + compute_num_tiles(tile_extents, expect_domain)); + + auto meta_size = [&](uint32_t f) -> uint64_t { + return finfo.ptr() + ->fragment_info() + ->single_fragment_info_vec()[f] + .meta() + ->fragment_meta_size(); + }; + + // validate fragment size - no fragment should be larger than max requested + // size + for (uint32_t f : fragments_in_order) { + const uint64_t fsize = finfo.fragment_size(f); + const uint64_t fmetasize = meta_size(f); + ASSERTER(fsize <= max_fragment_size + fmetasize); + } + + // validate fragment size - we wrote the largest possible fragments (no two + // adjacent should be under max fragment size) + for (uint32_t fi = 1; fi < fragments_in_order.size(); fi++) { + const uint32_t fprev = fragments_in_order[fi - 1]; + const uint32_t fcur = fragments_in_order[fi]; + const uint64_t combined_size = + finfo.fragment_size(fprev) + finfo.fragment_size(fcur); + const uint64_t combined_meta_size = meta_size(fprev) + meta_size(fcur); + ASSERTER(combined_size > max_fragment_size + combined_meta_size); + } + + return fragment_domains; +} + +} // namespace tiledb::test + +#endif diff --git a/tiledb/common/arithmetic.h b/tiledb/common/arithmetic.h index 08ced92e703..3aa85a6ae58 100644 --- a/tiledb/common/arithmetic.h +++ b/tiledb/common/arithmetic.h @@ -196,6 +196,20 @@ struct checked_arithmetic { return -negated.value(); } } + + /** + * @return `a * b` if it can be represented as a `uint64_t` without undefined + * behavior, `std::nullopt` otherwise + */ + static std::optional mul(uint64_t a, uint64_t b) { + if (b == 0) { + return 0; + } else if (a > std::numeric_limits::max() / b) { + return std::nullopt; + } else { + return a * b; + } + } }; template <> diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index 75f5273b51e..6d2b1789caf 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -673,14 +673,7 @@ uint64_t FragmentMetadata::fragment_size() const { for (const auto& file_validity_size : file_validity_sizes_) size += file_validity_size; - // The fragment metadata file size can be empty when we've loaded consolidated - // metadata - uint64_t meta_file_size = meta_file_size_; - if (meta_file_size == 0) { - auto meta_uri = fragment_uri_.join_path( - std::string(constants::fragment_metadata_filename)); - meta_file_size = resources_->vfs().file_size(meta_uri); - } + const uint64_t meta_file_size = fragment_meta_size(); // Validate that the meta_file_size is not zero, either preloaded or fetched // above iassert(meta_file_size != 0); @@ -691,14 +684,29 @@ uint64_t FragmentMetadata::fragment_size() const { return size; } -void FragmentMetadata::init_domain(const NDRange& non_empty_domain) { - auto& domain{array_schema_->domain()}; +uint64_t FragmentMetadata::fragment_meta_size() const { + // The fragment metadata file size can be empty when we've loaded consolidated + // metadata + if (meta_file_size_ == 0) { + auto meta_uri = fragment_uri_.join_path( + std::string(constants::fragment_metadata_filename)); + meta_file_size_ = resources_->vfs().file_size(meta_uri); + } + return meta_file_size_; +} +void FragmentMetadata::init_domain(const NDRange& non_empty_domain) { // Sanity check iassert(!non_empty_domain.empty()); iassert(non_empty_domain_.empty()); iassert(domain_.empty()); + set_domain(non_empty_domain); +} + +void FragmentMetadata::set_domain(const NDRange& non_empty_domain) { + auto& domain{array_schema_->domain()}; + // Set non-empty domain for dense arrays (for sparse it will be calculated // via the MBRs) if (dense_) { @@ -841,6 +849,32 @@ void FragmentMetadata::load( } void FragmentMetadata::store(const EncryptionKey& encryption_key) { + // integrity checks + if (dense_) { + const uint64_t dense_tile_num = tile_num(); + + for (const auto& tile_offsets : loaded_metadata_ptr_->tile_offsets()) { + iassert(tile_offsets.size() == dense_tile_num); + } + for (const auto& tile_var_offsets : + loaded_metadata_ptr_->tile_var_offsets()) { + iassert(tile_var_offsets.size() == dense_tile_num); + } + for (const auto& tile_var_sizes : loaded_metadata_ptr_->tile_var_sizes()) { + iassert(tile_var_sizes.size() == dense_tile_num); + } + for (const auto& tile_validity_offsets : + loaded_metadata_ptr_->tile_validity_offsets()) { + iassert(tile_validity_offsets.size() == dense_tile_num); + } + for (const auto& tile_null_counts : + loaded_metadata_ptr_->tile_null_counts()) { + if (!tile_null_counts.empty()) { + iassert(tile_null_counts.size() == dense_tile_num); + } + } + } + auto timer_se = resources_->stats().start_timer("write_store_frag_meta"); // Make sure the data fits in the current domain before we commit to disk. @@ -1194,6 +1228,11 @@ void FragmentMetadata::store_v15_or_higher( } void FragmentMetadata::set_num_tiles(uint64_t num_tiles) { + if (dense_) { + const uint64_t dense_tile_num = tile_num(); + iassert(num_tiles <= dense_tile_num); + } + for (auto& it : idx_map_) { auto i = it.second; iassert(num_tiles >= loaded_metadata_ptr_->tile_offsets()[i].size()); diff --git a/tiledb/sm/fragment/fragment_metadata.h b/tiledb/sm/fragment/fragment_metadata.h index 931d6d0099f..fe2e6e69a35 100644 --- a/tiledb/sm/fragment/fragment_metadata.h +++ b/tiledb/sm/fragment/fragment_metadata.h @@ -253,6 +253,9 @@ class FragmentMetadata { /** Retrieves the fragment size. */ uint64_t fragment_size() const; + /** @return the size of the metadata file */ + uint64_t fragment_meta_size() const; + /** * Returns true if the corresponding fragment is dense, and false if it * is sparse. @@ -353,6 +356,12 @@ class FragmentMetadata { */ void init_domain(const NDRange& non_empty_domain); + /** + * Updates the fragment's internal domain and non-empty domain members. + * Validity of the argument is not checked so use with caution. + */ + void set_domain(const NDRange& non_empty_domain); + /** * Loads the basic metadata from storage or `f_buff` for later * versions if it is not `nullptr`. @@ -898,7 +907,7 @@ class FragmentMetadata { uint64_t sparse_tile_num_; /** The size of the fragment metadata file. */ - uint64_t meta_file_size_; + mutable uint64_t meta_file_size_; /** Local mutex for thread-safety. */ std::mutex mtx_; diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 0fbd84db127..751511714ab 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -83,6 +83,13 @@ static uint64_t get_effective_memory_budget( /* CONSTRUCTORS & DESTRUCTORS */ /* ****************************** */ +Query::CoordsInfo::CoordsInfo() + : has_coords_(false) + , coords_buffer_(nullptr) + , coords_buffer_size_(nullptr) + , coords_num_(0) { +} + Query::Query( ContextResources& resources, CancellationSource cancellation_source, @@ -125,7 +132,6 @@ Query::Query( , remote_query_(false) , is_dimension_label_ordered_read_(false) , dimension_label_increasing_(true) - , fragment_size_(std::numeric_limits::max()) , memory_budget_(memory_budget) , query_remote_buffer_storage_(std::nullopt) , default_channel_{make_shared(HERE(), *this, 0)} { @@ -141,11 +147,6 @@ Query::Query( fragment_metadata_ = array->fragment_metadata(); - coords_info_.coords_buffer_ = nullptr; - coords_info_.coords_buffer_size_ = nullptr; - coords_info_.coords_num_ = 0; - coords_info_.has_coords_ = false; - callback_ = nullptr; callback_data_ = nullptr; status_ = QueryStatus::UNINITIALIZED; @@ -1636,10 +1637,19 @@ Status Query::submit() { } // Make sure fragment size is only set for global order. - if (fragment_size_ != std::numeric_limits::max() && - (layout_ != Layout::GLOBAL_ORDER || type_ != QueryType::WRITE)) { - throw QueryException( - "[submit] Fragment size is only supported for global order writes."); + if (fragment_size_.has_value()) { + if (layout_ != Layout::GLOBAL_ORDER || type_ != QueryType::WRITE) { + throw QueryException( + "[submit] Fragment size is only supported for global order writes."); + } else if (array_schema_->dense() && array_->is_remote()) { + // For dense arrays, `max_fragment_size_` requires buffering of a trail of + // filtered tiles which may not fit in a target fragment. This trail of + // tiles is not serializable. As such `max_fragment_size` cannot be used + // with remote dense array writes. + throw QueryException( + "[submit] Fragment size is not supported for remote global order " + "writes to dense arrays."); + } } // Check attribute/dimensions buffers completeness before query submits diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index 5d39ed133cf..c33d32bd559 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -127,6 +127,8 @@ class Query { /** Keeps track of the number of coordinates across coordinate buffers. */ uint64_t coords_num_; + + CoordsInfo(); }; /* ********************************* */ @@ -1108,7 +1110,7 @@ class Query { * * Note: This is only used for global order writes. */ - uint64_t fragment_size_; + std::optional fragment_size_; /** * Memory budget. If set to nullopt, the value will be obtained from the diff --git a/tiledb/sm/query/writers/global_order_writer.cc b/tiledb/sm/query/writers/global_order_writer.cc index 38b3a3523ba..85abf228145 100644 --- a/tiledb/sm/query/writers/global_order_writer.cc +++ b/tiledb/sm/query/writers/global_order_writer.cc @@ -45,13 +45,16 @@ #include "tiledb/sm/misc/parallel_functions.h" #include "tiledb/sm/misc/tdb_math.h" #include "tiledb/sm/misc/tdb_time.h" +#include "tiledb/sm/misc/types.h" #include "tiledb/sm/query/hilbert_order.h" #include "tiledb/sm/query/query_macros.h" #include "tiledb/sm/stats/global_stats.h" +#include "tiledb/sm/tile/arithmetic.h" #include "tiledb/sm/tile/generic_tile_io.h" #include "tiledb/sm/tile/tile_metadata_generator.h" #include "tiledb/sm/tile/writer_tile_tuple.h" #include "tiledb/storage_format/uri/generate_uri.h" +#include "tiledb/type/apply_with_type.h" using namespace tiledb; using namespace tiledb::common; @@ -59,6 +62,98 @@ using namespace tiledb::sm::stats; namespace tiledb::sm { +/** + * See `tiledb/sm/tile/arithmetic.h` function `is_rectangular_domain`. + * + * When writing multiple dense fragments the domain of each fragment + * must accurately reflect the coordinates contained in that fragment. + * This is called in `GlobalOrderWriter::identify_fragment_tile_boundaries` for + * each of the input tiles to determine whether a rectangle is formed and + * including a tile in a fragment is sound. + */ +static IsRectangularDomain is_rectangular_domain( + const ArraySchema& arrayschema, + const NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + const Domain& arraydomain = arrayschema.domain(); + + // NB: ordinary write subarray must be tile aligned but the consolidation + // subarray is not required to be + NDRange arraydomain_aligned = domain; + arraydomain.expand_to_tiles_when_no_current_domain(arraydomain_aligned); + + auto impl = [&](T) { + if constexpr (TileDBIntegral) { + std::vector tile_extents; + tile_extents.reserve(arraydomain.dim_num()); + for (uint64_t d = 0; d < arraydomain.dim_num(); d++) { + tile_extents.push_back(arraydomain.tile_extent(d).rvalue_as()); + } + + return is_rectangular_domain( + arrayschema.tile_order(), + tile_extents, + arraydomain_aligned, + start_tile, + num_tiles); + } else { + return IsRectangularDomain::Never; + } + }; + return apply_with_type(impl, arraydomain.dimension_ptr(0)->type()); +} + +/** + * See `tiledb/sm/tile/arithmetic.h` function `domain_tile_offset`. + * + * When writing multiple dense fragments the domain of each fragment + * must accurately reflect the coordinates contained in that fragment. + * This is called when starting a new fragment to update the domain of the + * previous fragment and set the correct starting domain of the new one. + */ +static std::optional domain_tile_offset( + const ArraySchema& arrayschema, + const NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + const Domain& arraydomain = arrayschema.domain(); + + // NB: ordinary write subarray must be tile aligned but the consolidation + // subarray is not required to be. Align for purposes of tile arithmetic. + NDRange arraydomain_aligned = domain; + arraydomain.expand_to_tiles_when_no_current_domain(arraydomain_aligned); + + auto impl = [&](T) { + if constexpr (TileDBIntegral) { + std::vector tile_extents; + tile_extents.reserve(arraydomain.dim_num()); + for (uint64_t d = 0; d < arraydomain.dim_num(); d++) { + tile_extents.push_back(arraydomain.tile_extent(d).rvalue_as()); + } + + std::optional r = domain_tile_offset( + arrayschema.tile_order(), + tile_extents, + arraydomain_aligned, + start_tile, + num_tiles); + if (r.has_value()) { + // aligning to the array domain may have extended beyond the subarray, + // clamp the result back within the subarray bounds + for (uint64_t d = 0; d < arraydomain.dim_num(); d++) { + tiledb::type::crop_range(domain[d], r.value()[d]); + } + } + return r; + } else { + return std::optional{}; + } + }; + + return apply_with_type(impl, arraydomain.dimension_ptr(0)->type()); +} + class GlobalOrderWriterException : public StatusException { public: explicit GlobalOrderWriterException(const std::string& message) @@ -74,7 +169,7 @@ GlobalOrderWriter::GlobalOrderWriter( stats::Stats* stats, shared_ptr logger, StrategyParams& params, - uint64_t fragment_size, + std::optional fragment_size, std::vector& written_fragment_info, bool disable_checks_consolidation, std::vector& processed_conditions, @@ -91,7 +186,7 @@ GlobalOrderWriter::GlobalOrderWriter( remote_query, fragment_name) , processed_conditions_(processed_conditions) - , fragment_size_(fragment_size) + , max_fragment_size_(fragment_size) , current_fragment_size_(0) { // Check the layout is global order. if (layout_ != Layout::GLOBAL_ORDER) { @@ -116,6 +211,7 @@ GlobalOrderWriter::GlobalWriteState::GlobalWriteState( : last_tiles_(memory_tracker->get_resource(MemoryType::WRITER_TILE_DATA)) , last_var_offsets_(memory_tracker->get_resource(MemoryType::WRITER_DATA)) , cells_written_(memory_tracker->get_resource(MemoryType::WRITER_DATA)) { + dense_.domain_tile_offset_ = 0; } /* ****************************** */ @@ -202,7 +298,7 @@ Status GlobalOrderWriter::init_global_write_state() { const auto& domain{array_schema_.domain()}; const auto capacity = array_schema_.capacity(); const auto cell_num_per_tile = - coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); + dense() ? domain.cell_num_per_tile() : capacity; auto last_tiles_it = global_write_state_->last_tiles_.emplace( std::piecewise_construct, std::forward_as_tuple(name), @@ -231,6 +327,11 @@ GlobalOrderWriter::GlobalWriteState* GlobalOrderWriter::get_global_state() { return global_write_state_.get(); } +const GlobalOrderWriter::GlobalWriteState* GlobalOrderWriter::get_global_state() + const { + return global_write_state_.get(); +} + std::pair> GlobalOrderWriter::multipart_upload_state(bool client) { if (client) { @@ -388,7 +489,7 @@ Status GlobalOrderWriter::check_global_order() const { } // Applicable only to sparse writes - exit if coordinates do not exist - if (!coords_info_.has_coords_ || coords_info_.coords_num_ == 0) { + if (dense() || coords_info_.coords_num_ == 0) { return Status::Ok(); } @@ -498,12 +599,14 @@ Status GlobalOrderWriter::check_global_order_hilbert() const { void GlobalOrderWriter::clean_up() { if (global_write_state_ != nullptr) { - const auto& uri = global_write_state_->frag_meta_->fragment_uri(); + if (global_write_state_->frag_meta_) { + const auto& uri = global_write_state_->frag_meta_->fragment_uri(); - // Cleanup the fragment we are currently writing. There is a chance that the - // URI is empty if creating the first fragment had failed. - if (!uri.empty()) { - resources_.vfs().remove_dir(uri); + // Cleanup the fragment we are currently writing. There is a chance that + // the URI is empty if creating the first fragment had failed. + if (!uri.empty()) { + resources_.vfs().remove_dir(uri); + } } global_write_state_.reset(nullptr); @@ -516,27 +619,37 @@ void GlobalOrderWriter::clean_up() { } Status GlobalOrderWriter::filter_last_tiles(uint64_t cell_num) { + const uint64_t last_tile_offset = + global_write_state_->last_tiles_.begin()->second.size() - 1; + // Adjust cell num for (auto& last_tiles : global_write_state_->last_tiles_) { - last_tiles.second[0].set_final_size(cell_num); + last_tiles.second.back()->set_final_size(cell_num); } // Compute coordinates metadata auto meta = global_write_state_->frag_meta_; - auto mbrs = compute_mbrs(global_write_state_->last_tiles_); + auto mbrs = compute_mbrs( + last_tile_offset, last_tile_offset + 1, global_write_state_->last_tiles_); set_coords_metadata(0, 1, global_write_state_->last_tiles_, mbrs, meta); // Compute tile metadata. - RETURN_NOT_OK(compute_tiles_metadata(1, global_write_state_->last_tiles_)); + RETURN_NOT_OK(compute_tiles_metadata( + last_tile_offset, + last_tile_offset + 1, + global_write_state_->last_tiles_)); // Gather stats stats_->add_counter( "cell_num", - global_write_state_->last_tiles_.begin()->second[0].cell_num()); + global_write_state_->last_tiles_.begin()->second.back()->cell_num()); stats_->add_counter("tile_num", 1); // Filter tiles - RETURN_NOT_OK(filter_tiles(&global_write_state_->last_tiles_)); + RETURN_NOT_OK(filter_tiles( + last_tile_offset, + last_tile_offset + 1, + &global_write_state_->last_tiles_)); return Status::Ok(); } @@ -625,19 +738,69 @@ Status GlobalOrderWriter::compute_coord_dups( Status GlobalOrderWriter::finalize_global_write_state() { iassert(layout_ == Layout::GLOBAL_ORDER, "layout = {}", layout_str(layout_)); - auto meta = global_write_state_->frag_meta_; - const auto& uri = meta->fragment_uri(); + + // For dense, there may be prepared tiles which have not been flushed yet + if (dense()) { + const uint64_t num_remaining = + global_write_state_->last_tiles_.begin()->second.size() - 1; + if (num_remaining > 0) { + iassert(global_write_state_->frag_meta_); + throw_if_not_ok(populate_fragment( + global_write_state_->last_tiles_, 0, num_remaining)); + + // FIXME: there is a possibility here that we write a tile bigger than the + // max fragment size if these remaining tiles fill it up and then the last + // tile runs over... in this case we need to do the rectangle thing all + // over again so as to avoid writing a fragment which exceeds the max + // fragment size. + // + // HOWEVER, this state might not be reachable, because dense global + // order writes must be fully tile-aligned, which means that the + // "last tile" which we would flush here should have zero cells. + // Note that the subarray is a rectangle, so + // `identify_fragment_tile_boundaries` should always indicate that all of + // the tiles can be written. + // + // As such we are not going to expend more effort on this unless + // we see evidence of it. + } + } else { + iassert(global_write_state_->last_tiles_.begin()->second.size() <= 1); + } // Handle last tile Status st = global_write_handle_last_tile(); + auto meta = global_write_state_->frag_meta_; + if (!st.ok()) { - throw_if_not_ok(close_files(meta)); + if (meta) { + throw_if_not_ok(close_files(meta)); + } return st; } + if (!meta) { + return Status::Ok(); + } + + const auto& uri = meta->fragment_uri(); + // Close all files RETURN_NOT_OK(close_files(meta)); + // Update dense fragment domain + if (dense()) { + const uint64_t num_tiles_in_fragment = + meta->loaded_metadata()->tile_offsets()[0].size(); + std::optional fragment_domain = domain_tile_offset( + array_schema_, + subarray_.ndrange(0), + global_write_state_->dense_.domain_tile_offset_, + num_tiles_in_fragment); + iassert(fragment_domain.has_value()); + meta->set_domain(std::move(fragment_domain.value())); + } + // Check that the same number of cells was written across attributes // and dimensions auto cell_num = global_write_state_->cells_written_[buffers_.begin()->first]; @@ -656,7 +819,7 @@ Status GlobalOrderWriter::finalize_global_write_state() { } // Check if the total number of cells written is equal to the subarray size - if (!coords_info_.has_coords_) { // This implies a dense array + if (dense()) { auto& domain{array_schema_.domain()}; auto expected_cell_num = domain.cell_num(subarray_.ndrange(0)); @@ -720,6 +883,21 @@ Status GlobalOrderWriter::finalize_global_write_state() { return st; } +Status GlobalOrderWriter::populate_fragment( + tdb::pmr::unordered_map& tiles, + uint64_t tile_offset, + uint64_t num_tiles) { + auto frag_meta = global_write_state_->frag_meta_; + + // write tiles for all attributes + RETURN_CANCEL_OR_ERROR( + write_tiles(tile_offset, tile_offset + num_tiles, frag_meta, &tiles)); + + frag_meta->set_tile_index_base(frag_meta->tile_index_base() + num_tiles); + + return Status::Ok(); +} + Status GlobalOrderWriter::global_write() { // Applicable only to global write on dense/sparse arrays iassert(layout_ == Layout::GLOBAL_ORDER, "layout = {}", layout_str(layout_)); @@ -727,8 +905,7 @@ Status GlobalOrderWriter::global_write() { // Initialize the global write state if this is the first invocation if (!global_write_state_) { RETURN_CANCEL_OR_ERROR(alloc_global_write_state()); - RETURN_CANCEL_OR_ERROR(create_fragment( - !coords_info_.has_coords_, global_write_state_->frag_meta_)); + RETURN_NOT_OK(create_fragment(dense(), global_write_state_->frag_meta_)); RETURN_CANCEL_OR_ERROR(init_global_write_state()); } @@ -748,71 +925,93 @@ Status GlobalOrderWriter::global_write() { query_memory_tracker_->get_resource(MemoryType::WRITER_TILE_DATA)); RETURN_CANCEL_OR_ERROR(prepare_full_tiles(coord_dups, &tiles)); - // Find number of tiles and gather stats - uint64_t tile_num = 0; - if (!tiles.empty()) { - auto it = tiles.begin(); - tile_num = it->second.size(); - - uint64_t cell_num = 0; - for (size_t t = 0; t < tile_num; ++t) { - cell_num += it->second[t].cell_num(); - } - stats_->add_counter("cell_num", cell_num); - stats_->add_counter("tile_num", tile_num); - } - - // No cells to be written + uint64_t tile_num = (tiles.empty() ? 0 : tiles.begin()->second.size()); if (tile_num == 0) { return Status::Ok(); } + // Compute tile metadata. + RETURN_CANCEL_OR_ERROR(compute_tiles_metadata(tiles)); + // Compute coordinate metadata (if coordinates are present) auto mbrs = compute_mbrs(tiles); - // Compute tile metadata. - RETURN_CANCEL_OR_ERROR(compute_tiles_metadata(tile_num, tiles)); + RETURN_NOT_OK(filter_tiles(&tiles)); - // Filter all tiles - RETURN_CANCEL_OR_ERROR(filter_tiles(&tiles)); + // include any prepared tiles from previous `submit` which were not flushed + for (const auto& it : buffers_) { + auto& last = global_write_state_->last_tiles_.at(it.first); + if (!last.empty()) { + const uint64_t num_leftover = last.size() - 1; + tiles.at(it.first).splice( + tiles.at(it.first).begin(), + last, + last.begin(), + std::next(last.begin(), num_leftover)); + } + } + tile_num = (tiles.empty() ? 0 : tiles.begin()->second.size()); + + const auto fragments = identify_fragment_tile_boundaries(tiles); + + for (uint64_t f = 0; f < fragments.tile_offsets_.size(); f++) { + const uint64_t input_start_tile = fragments.tile_offsets_[f]; + const uint64_t input_num_tiles = (f + 1 < fragments.tile_offsets_.size() ? + fragments.tile_offsets_[f + 1] : + fragments.num_writeable_tiles_) - + input_start_tile; + + if (input_num_tiles == 0) { + // this should only happen if there is only one tile of input and we have + // to wait for finalize, or if continuing a fragment from a previous write + // and there is no more room + iassert(f == 0); + if (current_fragment_size_ == 0) { + iassert(fragments.tile_offsets_.size() == 1); + } + } else { + if (f > 0 || !global_write_state_->frag_meta_) { + RETURN_CANCEL_OR_ERROR(start_new_fragment()); + } - uint64_t idx = 0; - while (idx < tile_num) { - auto frag_meta = global_write_state_->frag_meta_; + global_write_state_->frag_meta_->set_num_tiles( + global_write_state_->frag_meta_->tile_index_base() + input_num_tiles); - // Compute the number of tiles that will fit in this fragment. - auto num = num_tiles_to_write(idx, tile_num, tiles); + set_coords_metadata( + input_start_tile, + input_start_tile + input_num_tiles, + tiles, + mbrs, + global_write_state_->frag_meta_); - // If we're resuming a fragment write and the first tile doesn't fit into - // the previous fragment, we need to start a new fragment and recalculate - // the number of tiles to write. - if (current_fragment_size_ > 0 && num == 0) { - RETURN_CANCEL_OR_ERROR(start_new_fragment()); - num = num_tiles_to_write(idx, tile_num, tiles); + RETURN_CANCEL_OR_ERROR( + populate_fragment(tiles, input_start_tile, input_num_tiles)); } + } - // Set new number of tiles in the fragment metadata - auto new_num_tiles = frag_meta->tile_index_base() + num; - frag_meta->set_num_tiles(new_num_tiles); + current_fragment_size_ = fragments.last_fragment_size_; - if (new_num_tiles == 0) { - throw GlobalOrderWriterException( - "Fragment size is too small to write a single tile"); - } + if (fragments.num_writeable_tiles_ < tile_num) { + // sparse array should be able to write everything + iassert(dense()); - set_coords_metadata(idx, idx + num, tiles, mbrs, frag_meta); + const uint64_t offset_not_written = fragments.num_writeable_tiles_; - // Write tiles for all attributes - RETURN_CANCEL_OR_ERROR(write_tiles(idx, idx + num, frag_meta, &tiles)); - idx += num; + // Dense array does not have bounding rectangles. + // If there were any other tile metadata which we needed to draw from the + // un-filtered tiles, we would have to store that in the global write state + // here. But there is no other such metadata. + iassert(mbrs.empty()); - // If we didn't write all tiles, close this fragment and start another. - if (idx != tile_num) { - RETURN_CANCEL_OR_ERROR(start_new_fragment()); + // buffer tiles which couldn't fit in memory + for (auto& attr : tiles) { + auto& last = global_write_state_->last_tiles_.at(attr.first); + last.splice( + last.begin(), + attr.second, + std::next(attr.second.begin(), offset_not_written), + attr.second.end()); } - - // Increment the tile index base for the next global order write. - frag_meta->set_tile_index_base(new_num_tiles); } return Status::Ok(); @@ -821,16 +1020,22 @@ Status GlobalOrderWriter::global_write() { Status GlobalOrderWriter::global_write_handle_last_tile() { auto capacity = array_schema_.capacity(); auto& domain = array_schema_.domain(); - auto cell_num_per_tile = - coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); + auto cell_num_per_tile = dense() ? domain.cell_num_per_tile() : capacity; auto cell_num_last_tiles = global_write_state_->cells_written_[buffers_.begin()->first] % cell_num_per_tile; if (cell_num_last_tiles == 0) return Status::Ok(); + // if we haven't started a fragment yet, now is the time + // (this can happen if the writes do not fill a full tile) + if (!global_write_state_->frag_meta_) { + RETURN_CANCEL_OR_ERROR(start_new_fragment()); + } + // Reserve space for the last tile in the fragment metadata auto meta = global_write_state_->frag_meta_; + iassert(meta); meta->set_num_tiles(meta->tile_index_base() + 1); // Filter last tiles @@ -906,8 +1111,7 @@ Status GlobalOrderWriter::prepare_full_tiles_fixed( auto capacity = array_schema_.capacity(); auto cell_num = *buffer_size / cell_size; auto& domain{array_schema_.domain()}; - auto cell_num_per_tile = - coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); + auto cell_num_per_tile = dense() ? domain.cell_num_per_tile() : capacity; // Do nothing if there are no cells to write if (cell_num == 0) { @@ -915,7 +1119,7 @@ Status GlobalOrderWriter::prepare_full_tiles_fixed( } // First fill the last tile - auto& last_tile = global_write_state_->last_tiles_.at(name)[0]; + auto& last_tile = *global_write_state_->last_tiles_.at(name).back(); uint64_t cell_idx = 0; uint64_t last_tile_cell_idx = global_write_state_->cells_written_[name] % cell_num_per_tile; @@ -1087,8 +1291,7 @@ Status GlobalOrderWriter::prepare_full_tiles_var( auto capacity = array_schema_.capacity(); auto cell_num = buffer_size / constants::cell_var_offset_size; auto& domain{array_schema_.domain()}; - auto cell_num_per_tile = - coords_info_.has_coords_ ? capacity : domain.cell_num_per_tile(); + auto cell_num_per_tile = dense() ? domain.cell_num_per_tile() : capacity; auto attr_datatype_size = datatype_size(array_schema_.type(name)); // Do nothing if there are no cells to write @@ -1096,7 +1299,7 @@ Status GlobalOrderWriter::prepare_full_tiles_var( return Status::Ok(); // First fill the last tile - auto& last_tile = global_write_state_->last_tiles_.at(name)[0]; + auto& last_tile = *global_write_state_->last_tiles_.at(name).back(); auto& last_var_offset = global_write_state_->last_var_offsets_[name]; uint64_t cell_idx = 0; uint64_t last_tile_cell_idx = @@ -1371,92 +1574,206 @@ Status GlobalOrderWriter::prepare_full_tiles_var( return Status::Ok(); } -uint64_t GlobalOrderWriter::num_tiles_to_write( - uint64_t start, - uint64_t tile_num, - tdb::pmr::unordered_map& tiles) { +/** + * Identifies the division of input cells into target fragments, + * using `max_fragment_size_` as a hard limit on the target fragment size. + * + * `current_fragment_size_` may be nonzero if continuing a fragment from + * a previous `submit()`, so this field is used to initialize the fragment size + * before the first tile is examined. + * + * @param tiles + * + * @return a list of (fragment size, tile offset) pairs identifying the division + * of input data into target fragments + */ +GlobalOrderWriter::FragmentTileBoundaries +GlobalOrderWriter::identify_fragment_tile_boundaries( + const tdb::pmr::unordered_map& tiles) + const { // Cache variables to prevent map lookups. const auto buf_names = buffer_names(); - std::vector var_size; - std::vector nullable; - std::vector writer_tile_vectors; - var_size.reserve(buf_names.size()); - nullable.reserve(buf_names.size()); + std::vector writer_tile_vectors; writer_tile_vectors.reserve(buf_names.size()); for (auto& name : buf_names) { - var_size.emplace_back(array_schema_.var_size(name)); - nullable.emplace_back(array_schema_.is_nullable(name)); writer_tile_vectors.emplace_back(&tiles.at(name)); } + // Find number of tiles and gather stats + uint64_t tile_num = 0; + if (!tiles.empty()) { + auto it = tiles.begin(); + tile_num = it->second.size(); + + uint64_t cell_num = 0; + for (size_t t = 0; t < tile_num; ++t) { + cell_num += it->second[t].cell_num(); + } + stats_->add_counter("cell_num", cell_num); + stats_->add_counter("tile_num", tile_num); + } + + uint64_t running_tiles_size = current_fragment_size_; + uint64_t fragment_size = current_fragment_size_; + + uint64_t write_state_start_tile = + global_write_state_->dense_.domain_tile_offset_; + uint64_t current_fragment_num_tiles_already_written = 0; + if (dense() && global_write_state_->frag_meta_) { + current_fragment_num_tiles_already_written = + global_write_state_->frag_meta_->tile_index_base(); + } + + uint64_t fragment_start = 0; + std::vector fragments; + + // NB: this really wants to be `std::option` but some versions of gcc have a + // false positive uninitialized use warning + int64_t fragment_end = -1; + // Make sure we don't write more than the desired fragment size. - for (uint64_t t = start; t < tile_num; t++) { + for (uint64_t t = 0; t < tile_num; t++) { uint64_t tile_size = 0; for (uint64_t a = 0; a < buf_names.size(); a++) { - if (var_size[a]) { - tile_size += writer_tile_vectors[a] - ->at(t) - .offset_tile() - .filtered_buffer() - .size(); - tile_size += - writer_tile_vectors[a]->at(t).var_tile().filtered_buffer().size(); - } else { - tile_size += - writer_tile_vectors[a]->at(t).fixed_tile().filtered_buffer().size(); - } + tile_size += writer_tile_vectors[a]->at(t).filtered_size().value(); + } + + if (tile_size > + max_fragment_size_.value_or(std::numeric_limits::max())) { + throw GlobalOrderWriterException( + "Fragment size is too small to write a single tile"); + } - if (nullable[a]) { - tile_size += writer_tile_vectors[a] - ->at(t) - .validity_tile() - .filtered_buffer() - .size(); + bool should_start_new_fragment = false; + + // NB: normally this should only hit once, but if there is a single + // tile larger than the max fragment size it could hit twice and error + if (running_tiles_size + tile_size > + max_fragment_size_.value_or(std::numeric_limits::max())) { + if (fragment_end < 0) { + if (fragment_size == 0) { + throw GlobalOrderWriterException( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + } } + + should_start_new_fragment = true; + } else if (dense() && max_fragment_size_.has_value()) { + // Dense fragments must have a rectangular domain. + // And all fragments must be smaller than `max_fragment_size_`. + // We must identify the highest tile number which satisfies both criteria. + const uint64_t fragment_start_tile = + write_state_start_tile + fragment_start; + const uint64_t maybe_num_tiles = + current_fragment_num_tiles_already_written + t - fragment_start + 1; + should_start_new_fragment = + (is_rectangular_domain( + array_schema_, + subarray_.ndrange(0), + fragment_start_tile, + maybe_num_tiles) == IsRectangularDomain::Never); } - if (current_fragment_size_ + tile_size > fragment_size_) { - return t - start; + if (should_start_new_fragment) { + fragments.push_back(fragment_start); + + iassert(running_tiles_size >= fragment_size); + running_tiles_size -= fragment_size; + + fragment_start = + static_cast(std::max(0, fragment_end)); + fragment_end = -1; + + write_state_start_tile += current_fragment_num_tiles_already_written; + current_fragment_num_tiles_already_written = 0; + } + + bool extends_fragment = true; + if (dense() && max_fragment_size_.has_value()) { + // Dense fragments must have a rectangular domain. + // And all fragments must be smaller than `max_fragment_size_`. + // We must identify the highest tile number which satisfies both criteria. + const uint64_t fragment_start_tile = + write_state_start_tile + fragment_start; + const uint64_t maybe_num_tiles = + current_fragment_num_tiles_already_written + t - fragment_start + 1; + extends_fragment = + (is_rectangular_domain( + array_schema_, + subarray_.ndrange(0), + fragment_start_tile, + maybe_num_tiles) == IsRectangularDomain::Yes); } + if (extends_fragment) { + fragment_size = running_tiles_size + tile_size; + fragment_end = static_cast(t + 1); + } + + running_tiles_size += tile_size; + } - current_fragment_size_ += tile_size; + if (fragment_end >= 0) { + fragments.push_back(fragment_start); } - return tile_num - start; + return GlobalOrderWriter::FragmentTileBoundaries{ + .tile_offsets_ = fragments, + .num_writeable_tiles_ = + (fragment_end < 0 ? fragment_start : + static_cast(fragment_end)), + .last_fragment_size_ = fragment_size}; } Status GlobalOrderWriter::start_new_fragment() { - auto frag_meta = global_write_state_->frag_meta_; - auto& uri = frag_meta->fragment_uri(); + // finish off current fragment if there is one + if (global_write_state_->frag_meta_) { + auto frag_meta = global_write_state_->frag_meta_; + auto& uri = frag_meta->fragment_uri(); - // Close all files - RETURN_NOT_OK(close_files(frag_meta)); + // Close all files + RETURN_NOT_OK(close_files(frag_meta)); - // Set the processed conditions - frag_meta->set_processed_conditions(processed_conditions_); + // Update dense fragment domain + if (dense()) { + const uint64_t num_tiles_in_fragment = + frag_meta->loaded_metadata()->tile_offsets()[0].size(); + std::optional fragment_domain = domain_tile_offset( + array_schema_, + subarray_.ndrange(0), + global_write_state_->dense_.domain_tile_offset_, + num_tiles_in_fragment); + iassert(fragment_domain.has_value()); + frag_meta->set_domain(std::move(fragment_domain.value())); - // Compute fragment min/max/sum/null count - frag_meta->compute_fragment_min_max_sum_null_count(); + global_write_state_->dense_.domain_tile_offset_ += num_tiles_in_fragment; + } + + // Set the processed conditions + frag_meta->set_processed_conditions(processed_conditions_); - // Flush fragment metadata to storage - frag_meta->store(array_->get_encryption_key()); + // Compute fragment min/max/sum/null count + frag_meta->compute_fragment_min_max_sum_null_count(); - frag_uris_to_commit_.emplace_back(uri); + // Flush fragment metadata to storage + frag_meta->store(array_->get_encryption_key()); - // Make a new fragment URI. - const auto write_version = array_->array_schema_latest().write_version(); - auto frag_dir_uri = - array_->array_directory().get_fragments_dir(write_version); - auto new_fragment_str = storage_format::generate_timestamped_name( - fragment_timestamp_range_.first, - fragment_timestamp_range_.second, - write_version); - fragment_uri_ = frag_dir_uri.join_path(new_fragment_str); + frag_uris_to_commit_.emplace_back(uri); + + // Make a new fragment URI. + const auto write_version = array_->array_schema_latest().write_version(); + auto frag_dir_uri = + array_->array_directory().get_fragments_dir(write_version); + auto new_fragment_str = storage_format::generate_timestamped_name( + fragment_timestamp_range_.first, + fragment_timestamp_range_.second, + write_version); + fragment_uri_ = frag_dir_uri.join_path(new_fragment_str); + } // Create a new fragment. current_fragment_size_ = 0; - RETURN_NOT_OK(create_fragment( - !coords_info_.has_coords_, global_write_state_->frag_meta_)); + RETURN_NOT_OK(create_fragment(dense(), global_write_state_->frag_meta_)); return Status::Ok(); } diff --git a/tiledb/sm/query/writers/global_order_writer.h b/tiledb/sm/query/writers/global_order_writer.h index c15b81f67c8..b3eff76d081 100644 --- a/tiledb/sm/query/writers/global_order_writer.h +++ b/tiledb/sm/query/writers/global_order_writer.h @@ -75,6 +75,16 @@ class GlobalOrderWriter : public WriterBase { * attributes/dimensions, the first tile is the offsets tile, whereas the * second tile is the values tile. In both cases, the third tile stores a * validity tile for nullable attributes. + * + * For sparse arrays, each `WriterTileTupleVector` contains up to one tile, + * which is the data from the previous `submit` which did not fill a tile. + * + * For dense arrays, each `WriterTileTupleVector` contains any tiles which + * were not guaranteed to fit into `max_fragment_size_` while also forming + * a bounding rectangle. Written fragments always have a rectangular domain, + * and it is necessary to buffer tiles this way to avoid flushing data + * which might later require a fragment to exceed `max_fragment_size_` + * in order to represent a rectangular domain. */ tdb::pmr::unordered_map last_tiles_; @@ -108,6 +118,36 @@ class GlobalOrderWriter : public WriterBase { */ std::unordered_map multipart_upload_state_; + + /** + * State for writing dense fragments. + * + * Dense fragments use the bounding rectangle as a precise determination + * of where the contents of the fragment are in the domain, and as such + * it must be written correctly. This is usually not a problem, however + * global order writes can: + * 1) split up a single write into multiple fragments in order to satisfy + * the `max_fragment_size_` parameter + * 2) write into a single domain over the course of multiple `submit` + * calls which each write an arbitrary subset of the domain, + * re-using the buffers + * + * Both of these make it non-trivial to determine what the domain written + * into a fragment actually was, when the fragment fills up + * `max_fragment_size`. + * + * The fields of this struct, as well as `last_tiles_` of the outer struct, + * are used to track the amount of data which the writer has already + * processed so as to keep the correct position in the target subarray. + */ + struct DenseWriteState { + /** + * Tile offset in the subarray domain which the current fragment began + * writing to. + */ + uint64_t domain_tile_offset_; + }; + DenseWriteState dense_; }; /* ********************************* */ @@ -119,7 +159,7 @@ class GlobalOrderWriter : public WriterBase { stats::Stats* stats, shared_ptr logger, StrategyParams& params, - uint64_t fragment_size, + std::optional fragment_size, std::vector& written_fragment_info, bool disable_checks_consolidation, std::vector& processed_conditions, @@ -158,6 +198,9 @@ class GlobalOrderWriter : public WriterBase { /** Returns a bare pointer to the global state. */ GlobalWriteState* get_global_state(); + /** Returns a bare pointer to the global state. */ + const GlobalWriteState* get_global_state() const; + /** * Used in serialization to share the multipart upload state * among cloud executors @@ -208,7 +251,7 @@ class GlobalOrderWriter : public WriterBase { * The desired fragment size, in bytes. The writer will create a new fragment * once this size has been reached. */ - uint64_t fragment_size_; + std::optional max_fragment_size_; /** * Size currently written to the fragment. @@ -371,19 +414,59 @@ class GlobalOrderWriter : public WriterBase { WriterTileTupleVector* tiles) const; /** - * Return the number of tiles to write depending on the desired fragment - * size. The tiles passed in as an argument should have already been - * filtered. + * Contains the return values of + * `GlobalOrderWriter::identify_fragment_tile_boundaries`. + */ + struct FragmentTileBoundaries { + /** + * The offsets where each complete fragment starts. + */ + std::vector tile_offsets_; + + /** + * The number of writeable tiles. + * For sparse arrays this is the number of tiles of input. + * For dense arrays this may be less if there is a trail of tiles which + * cannot be guaranteed to fit within `max_fragment_size` while also forming + * a rectangular domain. + */ + uint64_t num_writeable_tiles_; + + /** + * The size in bytes of the filtered tiles which are written to the last + * fragment. The last fragment may be resumed by a subsequent `submit`. + */ + uint64_t last_fragment_size_; + }; + + /** + * Identify the manner in which the filtered input tiles map onto target + * fragments. If `max_fragment_size_` is much larger than the input, this may + * return just one result. + * + * Each element of the returned vector is a pair `(fragment_size, start_tile)` + * indicating the size of the fragment, and the first tile offset which + * corresponds to that fragment. * - * @param start Current tile index. - * @param tile_num Number of tiles in the tiles vectors. * @param tiles Map of vector of tiles, per attributes. - * @return Number of tiles to write. + * + * @return see `FragmentTileBoundaries` documentation */ - uint64_t num_tiles_to_write( - uint64_t start, - uint64_t tile_num, - tdb::pmr::unordered_map& tiles); + FragmentTileBoundaries identify_fragment_tile_boundaries( + const tdb::pmr::unordered_map& tiles) + const; + + /** + * Writes cells from the indicated slice of `tiles` into the current fragment. + * + * @param tiles the source of cells organized into filtered tiles + * @param tile_offset the tile from which to begin writing + * @param num_tiles the number of tiles to write + */ + Status populate_fragment( + tdb::pmr::unordered_map& tiles, + uint64_t tile_offset, + uint64_t num_tiles); /** * Close the current fragment and start a new one. The closed fragment will @@ -391,6 +474,13 @@ class GlobalOrderWriter : public WriterBase { * be written at once. */ Status start_new_fragment(); + + /** + * @return true if this write is to a dense fragment + */ + bool dense() const { + return !coords_info_.has_coords_; + } }; } // namespace sm diff --git a/tiledb/sm/query/writers/unordered_writer.cc b/tiledb/sm/query/writers/unordered_writer.cc index ed568d2fa87..4b8ea2d6b45 100644 --- a/tiledb/sm/query/writers/unordered_writer.cc +++ b/tiledb/sm/query/writers/unordered_writer.cc @@ -699,7 +699,7 @@ Status UnorderedWriter::unordered_write() { } // Compute tile metadata. - RETURN_CANCEL_OR_ERROR(compute_tiles_metadata(tile_num, tiles)); + RETURN_CANCEL_OR_ERROR(compute_tiles_metadata(tiles)); // Filter all tiles RETURN_CANCEL_OR_ERROR(filter_tiles(&tiles)); diff --git a/tiledb/sm/query/writers/writer_base.cc b/tiledb/sm/query/writers/writer_base.cc index 3e13b990dc9..e0fcd5683c0 100644 --- a/tiledb/sm/query/writers/writer_base.cc +++ b/tiledb/sm/query/writers/writer_base.cc @@ -614,6 +614,8 @@ Status WriterBase::close_files(shared_ptr meta) const { } std::vector WriterBase::compute_mbrs( + uint64_t start_tile_idx, + uint64_t end_tile_idx, const tdb::pmr::unordered_map& tiles) const { auto timer_se = stats_->start_timer("compute_coord_meta"); @@ -628,16 +630,13 @@ std::vector WriterBase::compute_mbrs( return std::vector(); } - // Compute number of tiles. Assumes all attributes and - // and dimensions have the same number of tiles - auto tile_num = tiles.begin()->second.size(); auto dim_num = array_schema_.dim_num(); // Compute MBRs - std::vector mbrs(tile_num); - auto status = - parallel_for(&resources_.compute_tp(), 0, tile_num, [&](uint64_t i) { - mbrs[i].resize(dim_num); + std::vector mbrs(end_tile_idx - start_tile_idx); + auto status = parallel_for( + &resources_.compute_tp(), start_tile_idx, end_tile_idx, [&](uint64_t i) { + mbrs[i - start_tile_idx].resize(dim_num); std::vector data(dim_num); for (unsigned d = 0; d < dim_num; ++d) { auto dim{array_schema_.dimension_ptr(d)}; @@ -689,12 +688,13 @@ void WriterBase::set_coords_metadata( } Status WriterBase::compute_tiles_metadata( - uint64_t tile_num, + uint64_t start_tile_idx, + uint64_t end_tile_idx, tdb::pmr::unordered_map& tiles) const { auto* compute_tp = &resources_.compute_tp(); // Parallelize over attributes? - if (tiles.size() > tile_num) { + if (tiles.size() > (end_tile_idx - start_tile_idx)) { auto st = parallel_for(compute_tp, 0, tiles.size(), [&](uint64_t i) { auto tiles_it = tiles.begin(); std::advance(tiles_it, i); @@ -724,14 +724,15 @@ Status WriterBase::compute_tiles_metadata( const auto var_size = array_schema_.var_size(attr); const auto cell_size = array_schema_.cell_size(attr); const auto cell_val_num = array_schema_.cell_val_num(attr); - auto st = parallel_for(compute_tp, 0, tile_num, [&](uint64_t t) { - TileMetadataGenerator md_generator( - type, is_dim, var_size, cell_size, cell_val_num); - md_generator.process_full_tile(attr_tiles[t]); - md_generator.set_tile_metadata(attr_tiles[t]); + auto st = parallel_for( + compute_tp, start_tile_idx, end_tile_idx, [&](uint64_t t) { + TileMetadataGenerator md_generator( + type, is_dim, var_size, cell_size, cell_val_num); + md_generator.process_full_tile(attr_tiles[t]); + md_generator.set_tile_metadata(attr_tiles[t]); - return Status::Ok(); - }); + return Status::Ok(); + }); RETURN_NOT_OK(st); } } @@ -757,7 +758,9 @@ std::string WriterBase::coords_to_str(uint64_t i) const { } Status WriterBase::create_fragment( - bool dense, shared_ptr& frag_meta) { + bool dense, + shared_ptr& frag_meta, + const NDRange* domain) { // Get write version, timestamp array was opened, and a reference to the // array directory. auto write_version = array_->array_schema_latest().write_version(); @@ -787,18 +790,21 @@ Status WriterBase::create_fragment( has_timestamps, has_delete_metadata); - frag_meta->init(subarray_.ndrange(0)); + frag_meta->init(domain ? *domain : subarray_.ndrange(0)); return Status::Ok(); } Status WriterBase::filter_tiles( + uint64_t start_tile_idx, + uint64_t end_tile_idx, tdb::pmr::unordered_map* tiles) { auto timer_se = stats_->start_timer("filter_tiles"); auto status = parallel_for(&resources_.compute_tp(), 0, tiles->size(), [&](uint64_t i) { auto tiles_it = tiles->begin(); std::advance(tiles_it, i); - throw_if_not_ok(filter_tiles(tiles_it->first, &tiles_it->second)); + throw_if_not_ok(filter_tiles( + start_tile_idx, end_tile_idx, tiles_it->first, &tiles_it->second)); throw_if_cancelled(); return Status::Ok(); }); @@ -808,7 +814,10 @@ Status WriterBase::filter_tiles( } Status WriterBase::filter_tiles( - const std::string& name, WriterTileTupleVector* tiles) { + uint64_t start_tile_idx, + uint64_t end_tile_idx, + const std::string& name, + WriterTileTupleVector* tiles) { const bool var_size = array_schema_.var_size(name); const bool nullable = array_schema_.is_nullable(name); @@ -818,7 +827,8 @@ Status WriterBase::filter_tiles( // Process all tiles, minus offsets, they get processed separately. std::vector> args; args.reserve(tile_num * (1 + nullable)); - for (auto& tile : *tiles) { + for (uint64_t t = start_tile_idx; t < end_tile_idx; t++) { + auto& tile = (*tiles)[t]; if (var_size) { args.emplace_back(&tile.var_tile(), &tile.offset_tile(), false, false); } else { diff --git a/tiledb/sm/query/writers/writer_base.h b/tiledb/sm/query/writers/writer_base.h index 4f085c1b7c8..b7304903002 100644 --- a/tiledb/sm/query/writers/writer_base.h +++ b/tiledb/sm/query/writers/writer_base.h @@ -242,14 +242,27 @@ class WriterBase : public StrategyBase, public IQueryStrategy { /** * Computes the MBRs. * + * @param start_tile_idx The index of the first tile to compute MBR for + * @param end_tile_idx The index of the last tile to compute MBR for * @param tiles The tiles to calculate the MBRs from. It is a map of vectors, * one vector of tiles per dimension/coordinates. * @return MBRs. */ std::vector compute_mbrs( + uint64_t start_tile_idx, + uint64_t end_tile_idx, const tdb::pmr::unordered_map& tiles) const; + /** + * Computes the MBRs for all of the requested tiles. See above. + */ + std::vector compute_mbrs( + const tdb::pmr::unordered_map& tiles) + const { + return compute_mbrs(0, tiles.begin()->second.size(), tiles); + } + /** * Set the coordinates metadata (e.g., MBRs). * @@ -270,15 +283,26 @@ class WriterBase : public StrategyBase, public IQueryStrategy { /** * Computes the tiles metadata (min/max/sum/null count). * - * @param tile_num The number of tiles. + * @param start_tile_idx The index of the first tile to compute metadata for + * @param end_tile_idx The index of the last tile to compute metadata for * @param tiles The tiles to calculate the tile metadata from. It is * a map of vectors, one vector of tiles per dimension. * @return Status */ Status compute_tiles_metadata( - uint64_t tile_num, + uint64_t start_tile_idx, + uint64_t end_tile_idx, tdb::pmr::unordered_map& tiles) const; + /** + * Computes the tiles metadata for each tile in the provided list. See above. + */ + Status compute_tiles_metadata( + tdb::pmr::unordered_map& tiles) + const { + return compute_tiles_metadata(0, tiles.begin()->second.size(), tiles); + } + /** * Returns the i-th coordinates in the coordinate buffers in string * format. @@ -293,27 +317,51 @@ class WriterBase : public StrategyBase, public IQueryStrategy { * * @param dense Whether the fragment is dense or not. * @param frag_meta The fragment metadata to be generated. + * @param domain Optional domain for the fragment, uses subarray 0th range if + * not provided * @return Status */ - Status create_fragment(bool dense, shared_ptr& frag_meta); + Status create_fragment( + bool dense, + shared_ptr& frag_meta, + const NDRange* domain = nullptr); /** * Runs the input coordinate and attribute tiles through their * filter pipelines. The tile buffers are modified to contain the output * of the pipeline. + * + * @param start_tile_idx The index of the first tile to filter + * @param end_tile_idx The index of the last tile to filter */ Status filter_tiles( + uint64_t start_tile_idx, + uint64_t end_tile_idx, tdb::pmr::unordered_map* tiles); + /** + * See above, filtering all of the provided tiles. + */ + Status filter_tiles( + tdb::pmr::unordered_map* tiles) { + return filter_tiles(0, tiles->begin()->second.size(), tiles); + } + /** * Runs the input tiles for the input attribute through the filter pipeline. * The tile buffers are modified to contain the output of the pipeline. * + * @param start_tile_idx The index of the first tile to filter + * @param end_tile_idx The index of the last tile to filter * @param name The attribute/dimension the tiles belong to. * @param tile The tiles to be filtered. * @return Status */ - Status filter_tiles(const std::string& name, WriterTileTupleVector* tiles); + Status filter_tiles( + uint64_t start_tile_idx, + uint64_t end_tile_idx, + const std::string& name, + WriterTileTupleVector* tiles); /** * Runs the input tile for the input attribute/dimension through the filter diff --git a/tiledb/sm/tile/arithmetic.h b/tiledb/sm/tile/arithmetic.h new file mode 100644 index 00000000000..58882132d2c --- /dev/null +++ b/tiledb/sm/tile/arithmetic.h @@ -0,0 +1,228 @@ +/** + * @file tiledb/sm/tile/arithmetic.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides template definitions for doing tile arithmetic, + * e.g. computing new domains based on offsets and such. + * + * Definitions: + * + * **Hyperrectangle**: + * The generalization of a rectangle to higher dimensions. + * This is a standard term from mathematical literature. + * + * **Hyperrow**: + * The generalization of a row to higher dimensions. + * This does not appear to be a standard term from mathematical literature. + * A row in a 2D domain is a rectangle of height 1, i.e. spanning a single + * coordinate of the outermost "row" dimension. So, in a higher-dimensional + * plane, a hyperrow is a hyperrectangle which spans a single coordinate of the + * outermost dimension. For example, in a 3D domain a hyperrow is a plane. + */ +#ifndef TILEDB_TILE_ARITHMETIC_H +#define TILEDB_TILE_ARITHMETIC_H + +#include "tiledb/common/arithmetic.h" +#include "tiledb/sm/array_schema/dimension.h" +#include "tiledb/sm/enums/layout.h" +#include "tiledb/sm/misc/types.h" +#include "tiledb/type/range/range.h" + +namespace tiledb::sm { + +/** + * Ternary value for the result of `is_rectangular_domain`. + * Describes whether a `[start_tile, start_tile + num_tiles)` range + * over a given domain forms a rectangle. + */ +enum class IsRectangularDomain { + /** The range is not a rectangle, but extending it could create one. */ + No, + /** The range is not a rectangle, and extending it can never create one. */ + Never, + /** The range is a rectangle. */ + Yes +}; + +/** + * @return true if the range `[start_tile, start_tile + num_tiles)` represents + * a hyper-rectangle inside `domain` with tile sizes given by `tile_extents` + */ +template +static IsRectangularDomain is_rectangular_domain( + Layout tile_order, + std::span tile_extents, + const sm::NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + for (uint64_t d_outer = 0; d_outer < tile_extents.size(); d_outer++) { + uint64_t hyperrow_num_tiles = 1; + for (uint64_t d_inner = d_outer + 1; d_inner < tile_extents.size(); + d_inner++) { + const uint64_t d = + (tile_order == Layout::ROW_MAJOR ? d_inner : + tile_extents.size() - d_inner - 1); + const uint64_t d_inner_num_tiles = + sm::Dimension::tile_idx( + domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + + 1; + + const auto maybe = checked_arithmetic::mul( + hyperrow_num_tiles, d_inner_num_tiles); + if (maybe.has_value()) { + hyperrow_num_tiles = maybe.value(); + } else { + throw std::overflow_error( + "Cannot compute subrectangle of domain due to arithmetic overflow: " + "domain tile extents may be too large"); + } + } + + const uint64_t hyperrow_offset = start_tile % hyperrow_num_tiles; + if (hyperrow_offset + num_tiles > hyperrow_num_tiles) { + if (hyperrow_offset != 0) { + return IsRectangularDomain::Never; + } else if (num_tiles % hyperrow_num_tiles != 0) { + return IsRectangularDomain::No; + } + } + } + return IsRectangularDomain::Yes; +} + +/** + * Compute the number of tiles per hyperrow for the given `domain` with tiles + * given by `tile_extents`. + * + * For D dimensions, the returned vector contains `D+1` elements. + * Position 0 is the number of tiles in `domain`. + * For dimension `d`, position `d + 1` is the number of tiles in a hyperrow of + * dimension `d` (and is thus always 1 for the final dimension). + */ +template +std::vector> compute_hyperrow_sizes( + Layout tile_order, + std::span tile_extents, + const sm::NDRange& domain) { + std::vector> hyperrow_sizes( + tile_extents.size() + 1, 1); + for (uint64_t di = 0; di < tile_extents.size(); di++) { + const uint64_t d = + (tile_order == Layout::ROW_MAJOR ? di : tile_extents.size() - di - 1); + const uint64_t d_num_tiles = + sm::Dimension::tile_idx( + domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + + 1; + hyperrow_sizes[di] = d_num_tiles; + } + for (uint64_t d = tile_extents.size(); d > 0; d--) { + if (hyperrow_sizes[d - 1].has_value() && hyperrow_sizes[d].has_value()) { + hyperrow_sizes[d - 1] = checked_arithmetic::mul( + hyperrow_sizes[d - 1].value(), hyperrow_sizes[d].value()); + } else { + hyperrow_sizes[d - 1] = std::nullopt; + } + } + + return hyperrow_sizes; +} + +/** + * @return a new range which is contained the rectangle within `domain` defined + * by `[start_tile, start_tile + num_tiles)` for the tile sizes given by + * `tile_extents`. If this does not represent a valid rectangle then + * `std::nullopt` is returned instead. + */ +template +static std::optional domain_tile_offset( + Layout tile_order, + std::span tile_extents, + const sm::NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles) { + sm::NDRange r; + r.resize(tile_extents.size()); + + const std::vector> dimension_sizes = + compute_hyperrow_sizes(tile_order, tile_extents, domain); + + for (uint64_t di = 0; di < tile_extents.size(); di++) { + const uint64_t d = + (tile_order == Layout::ROW_MAJOR ? di : tile_extents.size() - di - 1); + + if (!dimension_sizes[di + 1].has_value()) { + throw std::overflow_error( + "Cannot compute subrectangle of domain due to arithmetic overflow: " + "domain tile extents may be too large"); + } + const uint64_t hyperrow_num_tiles = dimension_sizes[di + 1].value(); + + T this_dimension_start_tile, this_dimension_end_tile; + if (dimension_sizes[di].has_value()) { + const uint64_t outer_num_tiles = dimension_sizes[di].value(); + this_dimension_start_tile = (start_tile / hyperrow_num_tiles) % + (outer_num_tiles / hyperrow_num_tiles); + this_dimension_end_tile = + ((start_tile + num_tiles - 1) / hyperrow_num_tiles) % + (outer_num_tiles / hyperrow_num_tiles); + } else { + this_dimension_start_tile = start_tile / hyperrow_num_tiles; + this_dimension_end_tile = + (start_tile + num_tiles - 1) / hyperrow_num_tiles; + } + + if (start_tile % hyperrow_num_tiles == 0) { + // aligned to the start of the hyperrow + if (num_tiles > hyperrow_num_tiles && + num_tiles % hyperrow_num_tiles != 0) { + return std::nullopt; + } + } else { + // begins in the middle of the hyperrow + const uint64_t offset = start_tile % hyperrow_num_tiles; + if (offset + num_tiles > hyperrow_num_tiles) { + return std::nullopt; + } + } + + const T start = + domain[d].start_as() + (this_dimension_start_tile * tile_extents[d]); + const T end = domain[d].start_as() + + (this_dimension_end_tile * tile_extents[d]) + + tile_extents[d] - 1; + r[d] = Range( + std::max(domain[d].start_as(), start), + std::min(domain[d].end_as(), end)); + } + + return r; +} + +} // namespace tiledb::sm + +#endif diff --git a/tiledb/sm/tile/test/CMakeLists.txt b/tiledb/sm/tile/test/CMakeLists.txt index 6feeb4eafb1..bb06dbdcb41 100644 --- a/tiledb/sm/tile/test/CMakeLists.txt +++ b/tiledb/sm/tile/test/CMakeLists.txt @@ -29,7 +29,10 @@ include(unit_test) commence(unit_test tile) this_target_sources( main.cc + unit_arithmetic.cc unit_tile.cc + ${CMAKE_SOURCE_DIR}/test/support/rapidcheck/show/array_schema_templates.cc ) this_target_object_libraries(tile mem_helpers) + this_target_link_libraries(rapidcheck) conclude(unit_test) diff --git a/tiledb/sm/tile/test/arithmetic.h b/tiledb/sm/tile/test/arithmetic.h new file mode 100644 index 00000000000..659fb5329ec --- /dev/null +++ b/tiledb/sm/tile/test/arithmetic.h @@ -0,0 +1,95 @@ +/** + * @file tiledb/sm/tile/arithmetic.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file provides template definitions for functions which are + * used to test tile arithmetic. + */ +#ifndef TILEDB_TILE_TEST_ARITHMETIC_H +#define TILEDB_TILE_TEST_ARITHMETIC_H + +#include "tiledb/sm/tile/arithmetic.h" + +namespace tiledb::test { + +/** + * @return the number of tiles in `subrectangle` based on the tile sizes in + * `tile_extents` + */ +template +uint64_t compute_num_tiles( + std::span tile_extents, const sm::NDRange& subrectangle) { + uint64_t num_tiles_result = 1; + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t num_tiles_this_dimension = sm::Dimension::tile_idx( + subrectangle[d].end_as(), + subrectangle[d].start_as(), + tile_extents[d]) + + 1; + num_tiles_result *= num_tiles_this_dimension; + } + + return num_tiles_result; +} + +/** + * @return the tile offset of `subrectangle` within `domain` based on the tile + * sizes in `tile_extents` + */ +template +std::optional compute_start_tile( + sm::Layout tile_order, + std::span tile_extents, + const sm::NDRange& domain, + const sm::NDRange& subrectangle) { + const std::vector> hyperrow_sizes = + sm::compute_hyperrow_sizes(tile_order, tile_extents, domain); + + uint64_t start_tile_result = 0; + for (uint64_t di = 0; di < tile_extents.size(); di++) { + const uint64_t d = + (tile_order == sm::Layout::ROW_MAJOR ? di : + tile_extents.size() - di - 1); + const uint64_t start_tile_this_dimension = sm::Dimension::tile_idx( + subrectangle[d].start_as(), + domain[d].start_as(), + tile_extents[d]); + if (hyperrow_sizes[di + 1].has_value()) { + start_tile_result += + start_tile_this_dimension * hyperrow_sizes[di + 1].value(); + } else { + return std::nullopt; + } + } + + return start_tile_result; +} + +} // namespace tiledb::test + +#endif diff --git a/tiledb/sm/tile/test/unit_arithmetic.cc b/tiledb/sm/tile/test/unit_arithmetic.cc new file mode 100644 index 00000000000..333762dd399 --- /dev/null +++ b/tiledb/sm/tile/test/unit_arithmetic.cc @@ -0,0 +1,756 @@ +#include +#include +#include "test/support/rapidcheck/array_schema_templates.h" +#include "test/support/src/array_schema_templates.h" +#include "tiledb/sm/array_schema/dimension.h" +#include "tiledb/sm/misc/types.h" +#include "tiledb/sm/tile/arithmetic.h" +#include "tiledb/sm/tile/test/arithmetic.h" +#include "tiledb/type/range/range.h" + +#include + +using namespace tiledb; +using namespace sm; +using namespace tiledb::test; + +template +static IsRectangularDomain is_rectangular_domain( + std::span tile_extents, + T lower_bound, + T upper_bound, + uint64_t start_tile, + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { + sm::NDRange r; + r.push_back(Range(lower_bound, upper_bound)); + return is_rectangular_domain( + tile_order, tile_extents, r, start_tile, num_tiles); +} + +template +static IsRectangularDomain is_rectangular_domain( + std::span tile_extents, + T d1_lower_bound, + T d1_upper_bound, + T d2_lower_bound, + T d2_upper_bound, + uint64_t start_tile, + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { + sm::NDRange r; + r.push_back(Range(d1_lower_bound, d1_upper_bound)); + r.push_back(Range(d2_lower_bound, d2_upper_bound)); + return is_rectangular_domain( + tile_order, tile_extents, r, start_tile, num_tiles); +} + +template +static IsRectangularDomain is_rectangular_domain( + const templates::Dimension
& d1, + const templates::Dimension
& d2, + uint64_t start_tile, + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { + using Coord = templates::Dimension
::value_type; + const std::vector extents = {d1.extent, d2.extent}; + return is_rectangular_domain( + extents, + d1.domain.lower_bound, + d1.domain.upper_bound, + d2.domain.lower_bound, + d2.domain.upper_bound, + start_tile, + num_tiles, + tile_order); +} + +template +static IsRectangularDomain is_rectangular_domain( + const templates::Dimension
& d1, + const templates::Dimension
& d2, + const templates::Dimension
& d3, + uint64_t start_tile, + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { + using Coord = templates::Dimension
::value_type; + const std::vector extents = {d1.extent, d2.extent, d3.extent}; + sm::NDRange r; + r.push_back(Range(d1.domain.lower_bound, d1.domain.upper_bound)); + r.push_back(Range(d2.domain.lower_bound, d2.domain.upper_bound)); + r.push_back(Range(d3.domain.lower_bound, d3.domain.upper_bound)); + return is_rectangular_domain( + tile_order, extents, r, start_tile, num_tiles); +} + +// in one dimension all domains are rectangles +TEST_CASE("is_rectangular_domain 1d", "[arithmetic]") { + rc::prop( + "is_rectangular_domain 1d", + [](templates::Dimension dimension) { + const uint64_t start_tile = + *rc::gen::inRange(0, dimension.num_tiles()); + const uint64_t num_tiles = + *rc::gen::inRange(1, dimension.num_tiles() - start_tile); + + const std::vector extents = {dimension.extent}; + RC_ASSERT( + is_rectangular_domain( + extents, + dimension.domain.lower_bound, + dimension.domain.upper_bound, + start_tile, + num_tiles) == IsRectangularDomain::Yes); + }); +} + +TEST_CASE("is_rectangular_domain 2d", "[arithmetic]") { + /* + * Domain is a 16x16 square + */ + SECTION("Square") { + const uint64_t d1_lower = GENERATE(0, 3); + const uint64_t d1_upper = d1_lower + 16 - 1; + const uint64_t d2_lower = GENERATE(0, 3); + const uint64_t d2_upper = d2_lower + 16 - 1; + + SECTION("Row tiles") { + const std::vector extents = {1, 16}; + for (uint64_t start_tile = 0; start_tile < 15; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= 16; + num_tiles++) { + CAPTURE(start_tile, num_tiles); + CHECK( + is_rectangular_domain( + extents, + d1_lower, + d1_upper, + d2_lower, + d2_upper, + start_tile, + num_tiles) == IsRectangularDomain::Yes); + } + } + } + + SECTION("Square tiles") { + // 7x7 tiles will subdivide the 16x16 square into 3x3 tiles + const std::vector extents = {7, 7}; + + auto tt = [&](uint64_t start_tile, + uint64_t num_tiles) -> IsRectangularDomain { + return is_rectangular_domain( + extents, + d1_lower, + d1_upper, + d2_lower, + d2_upper, + start_tile, + num_tiles); + }; + + // tiles aligned with the start: rectangle formed if less than one row, or + // integral number of rows + for (uint64_t start_tile : {0, 3, 6}) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= 9; num_tiles++) { + CAPTURE(start_tile, num_tiles); + if (num_tiles < 3 || num_tiles % 3 == 0) { + CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else { + CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::No); + } + } + } + + // otherwise a rectangle is only formed within the same row + for (uint64_t start_tile : {1, 2, 4, 5, 7, 8}) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= 9; num_tiles++) { + CAPTURE(start_tile, num_tiles); + if ((start_tile % 3) + num_tiles <= 3) { + CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else { + CHECK(tt(start_tile, num_tiles) == IsRectangularDomain::Never); + } + } + } + } + } + + using Dim64 = templates::Dimension; + + auto instance_is_rectangular_domain_2d = + [](Dim64 d1, Dim64 d2) { + const std::vector extents = {d1.extent, d2.extent}; + auto tt = [&](uint64_t start_tile, + uint64_t num_tiles) -> IsRectangularDomain { + return is_rectangular_domain(d1, d2, start_tile, num_tiles); + }; + + const uint64_t total_tiles = d1.num_tiles() * d2.num_tiles(); + + for (uint64_t t = 0; t < d1.num_tiles(); t += d2.num_tiles()) { + // row-aligned tiles + for (uint64_t num_tiles = 1; t + num_tiles <= total_tiles; + num_tiles++) { + if (num_tiles <= d2.num_tiles() || + num_tiles % d2.num_tiles() == 0) { + ASSERTER(tt(t, num_tiles) == IsRectangularDomain::Yes); + } else { + ASSERTER(tt(t, num_tiles) == IsRectangularDomain::No); + } + } + // other tiles + for (uint64_t o = 1; t + o < d2.num_tiles(); o++) { + for (uint64_t num_tiles = 1; t + o + num_tiles <= total_tiles; + num_tiles++) { + if (((t + o) % d2.num_tiles()) + num_tiles <= d2.num_tiles()) { + ASSERTER(tt(t + o, num_tiles) == IsRectangularDomain::Yes); + } else { + ASSERTER(tt(t + o, num_tiles) == IsRectangularDomain::Never); + } + } + } + } + }; + + SECTION("Shrinking") { + instance_is_rectangular_domain_2d(Dim64(0, 2, 1), Dim64(0, 0, 1)); + instance_is_rectangular_domain_2d(Dim64(0, 2, 1), Dim64(0, 1, 1)); + } + + rc::prop("is_rectangular_domain 2d", [&]() { + Dim64 d1 = *rc::make_dimension(std::nullopt, {64}); + Dim64 d2 = *rc::make_dimension(std::nullopt, {64}); + instance_is_rectangular_domain_2d.operator()(d1, d2); + }); +} + +TEST_CASE("is_rectangular_domain 3d", "[arithmetic]") { + using Dim64 = templates::Dimension; + + /** + * 3D plane tiles (where the outermost dimension has extent 1) + * should produce the same results as rectangular tiles in the plane + */ + rc::prop("plane tiles", [&]() { + Dim64 d1 = *rc::make_dimension(std::nullopt, {1}); + Dim64 d2 = *rc::make_dimension(std::nullopt, {32}); + Dim64 d3 = *rc::make_dimension(std::nullopt, {32}); + + const uint64_t total_tiles = + d1.num_tiles() * d2.num_tiles() * d3.num_tiles(); + for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; + num_tiles++) { + const IsRectangularDomain rectangle = + is_rectangular_domain(d2, d3, start_tile, num_tiles); + const IsRectangularDomain plane = + is_rectangular_domain(d1, d2, d3, start_tile, num_tiles); + + RC_ASSERT(rectangle == plane); + } + } + }); + + /** + * Runs over the possible `(start_tiles, num_tiles)` pairs for dimensions + * `{d1, d2, d3}` and asserts that `is_rectangular_domain` returns true if and + * only if the pair represents an expected rectangle. + */ + auto instance_is_rectangular_domain_3d = []( + Dim64 d1, Dim64 d2, Dim64 d3) { + auto tt = [&](uint64_t start_tile, + uint64_t num_tiles) -> IsRectangularDomain { + return is_rectangular_domain(d1, d2, d3, start_tile, num_tiles); + }; + + const uint64_t total_tiles = + d1.num_tiles() * d2.num_tiles() * d3.num_tiles(); + const uint64_t plane_tiles = d2.num_tiles() * d3.num_tiles(); + + for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; + num_tiles++) { + if (start_tile % plane_tiles == 0) { + // aligned to a plane, several options to be a rectangle + if (num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else if ( + num_tiles <= plane_tiles && num_tiles % d3.num_tiles() == 0) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else if (num_tiles % plane_tiles == 0) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::No); + } + } else if (start_tile % d3.num_tiles() == 0) { + // aligned to a row within a plane, but not aligned to the plane + // this is a rectangle if it is an integral number of rows, or + // fits within a row + if (num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else if ( + num_tiles % d3.num_tiles() == 0 && + (start_tile % plane_tiles) + num_tiles <= plane_tiles) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else if ((start_tile % plane_tiles) + num_tiles <= plane_tiles) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::No); + } else { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Never); + } + } else { + // unaligned, only a rectangle if it doesn't advance rows + if (start_tile % d3.num_tiles() + num_tiles <= d3.num_tiles()) { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Yes); + } else { + ASSERTER(tt(start_tile, num_tiles) == IsRectangularDomain::Never); + } + } + } + } + }; + + SECTION("Shrinking") { + instance_is_rectangular_domain_3d( + Dim64(0, 1, 1), Dim64(0, 0, 1), Dim64(0, 1, 1)); + instance_is_rectangular_domain_3d( + Dim64(0, 1, 1), Dim64(0, 2, 1), Dim64(0, 0, 1)); + } + + rc::prop("any tiles", [&]() { + const Dim64 d1 = + *rc::make_dimension(std::nullopt, {16}); + const Dim64 d2 = + *rc::make_dimension(std::nullopt, {16}); + const Dim64 d3 = + *rc::make_dimension(std::nullopt, {16}); + + instance_is_rectangular_domain_3d.operator()( + d1, d2, d3); + }); +} + +template +std::optional instance_domain_tile_offset( + std::span tile_extents, + const sm::NDRange& domain, + uint64_t start_tile, + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { + const IsRectangularDomain expect_rectangle = is_rectangular_domain( + tile_order, tile_extents, domain, start_tile, num_tiles); + const std::optional adjusted_domain = domain_tile_offset( + tile_order, tile_extents, domain, start_tile, num_tiles); + if (expect_rectangle != IsRectangularDomain::Yes) { + ASSERTER(!adjusted_domain.has_value()); + return std::nullopt; + } + + ASSERTER(adjusted_domain.has_value()); + + const uint64_t num_tiles_result = + compute_num_tiles(tile_extents, adjusted_domain.value()); + ASSERTER(num_tiles_result == num_tiles); + + const std::optional start_tile_result = compute_start_tile( + tile_order, tile_extents, domain, adjusted_domain.value()); + ASSERTER(start_tile_result == start_tile); + + return adjusted_domain; +} + +template +void instance_domain_tile_offset( + std::span tile_extents, + const sm::NDRange& domain, + Layout tile_order = Layout::ROW_MAJOR) { + uint64_t total_tiles = 1; + for (uint64_t d = 0; d < tile_extents.size(); d++) { + const uint64_t num_tiles_this_dimension = + sm::Dimension::tile_idx( + domain[d].end_as(), domain[d].start_as(), tile_extents[d]) + + 1; + total_tiles *= num_tiles_this_dimension; + } + for (uint64_t start_tile = 0; start_tile < total_tiles; start_tile++) { + for (uint64_t num_tiles = 1; start_tile + num_tiles <= total_tiles; + num_tiles++) { + instance_domain_tile_offset( + tile_extents, domain, start_tile, num_tiles, tile_order); + } + } +} + +template +std::optional::value_type>>> +instance_domain_tile_offset( + const std::vector>& dims, + uint64_t start_tile, + uint64_t num_tiles, + Layout tile_order = Layout::ROW_MAJOR) { + using Coord = typename templates::Dimension
::value_type; + + std::vector tile_extents; + for (const auto& dim : dims) { + tile_extents.push_back(dim.extent); + } + + sm::NDRange domain; + for (const auto& dim : dims) { + domain.push_back(Range(dim.domain.lower_bound, dim.domain.upper_bound)); + } + + const auto range = instance_domain_tile_offset( + tile_extents, domain, start_tile, num_tiles, tile_order); + if (!range.has_value()) { + return std::nullopt; + } + + std::vector> typed_range; + for (const auto& r : range.value()) { + typed_range.emplace_back( + r.template start_as(), r.template end_as()); + } + return typed_range; +} + +template +void instance_domain_tile_offset( + const std::vector>& dims, + Layout tile_order = Layout::ROW_MAJOR) { + using Coord = templates::Dimension
::value_type; + + std::vector tile_extents; + for (const auto& dim : dims) { + tile_extents.push_back(dim.extent); + } + + sm::NDRange domain; + for (const auto& dim : dims) { + domain.push_back(Range(dim.domain.lower_bound, dim.domain.upper_bound)); + } + + instance_domain_tile_offset( + tile_extents, domain, tile_order); +} + +TEST_CASE("domain_tile_offset 1d", "[arithmetic]") { + using Dim64 = templates::Dimension; + + SECTION("Shrinking") { + instance_domain_tile_offset({Dim64(0, 18, 5)}); + } + + rc::prop("any tiles", []() { + const Dim64 d1 = *rc::make_dimension(std::nullopt, {128}); + + instance_domain_tile_offset({d1}); + }); +} + +TEST_CASE("domain_tile_offset 2d", "[arithmetic]") { + using Dim64 = templates::Dimension; + using Dom64 = Dim64::domain_type; + + SECTION("Rectangle examples") { + const uint64_t d1_lower_bound = GENERATE(0, 3); + const uint64_t d1_extent = GENERATE(1, 4); + const uint64_t d2_lower_bound = GENERATE(0, 3); + const uint64_t d2_extent = GENERATE(1, 4); + + const Dim64 d1( + d1_lower_bound, d1_lower_bound + (5 * d1_extent) - 1, d1_extent); + const Dim64 d2( + d2_lower_bound, d2_lower_bound + (4 * d2_extent) - 1, d2_extent); + + auto make_d1 = [&](uint64_t r_start, uint64_t r_end) { + return Dom64( + d1_lower_bound + r_start * d1_extent, + d1_lower_bound + r_end * d1_extent + d1_extent - 1); + }; + auto make_d2 = [&](uint64_t c_start, uint64_t c_end) { + return Dom64( + d2_lower_bound + c_start * d2_extent, + d2_lower_bound + c_end * d2_extent + d2_extent - 1); + }; + + SECTION("Whole domain") { + const Layout tile_order = GENERATE(Layout::ROW_MAJOR, Layout::COL_MAJOR); + const auto r = instance_domain_tile_offset( + {d1, d2}, 0, 20, tile_order); + CHECK(r == std::vector{d1.domain, d2.domain}); + } + + SECTION("Sub-rectangle") { + const auto r1 = + instance_domain_tile_offset({d1, d2}, 4, 8); + CHECK(r1 == std::vector{make_d1(1, 2), d2.domain}); + + const auto r2 = + instance_domain_tile_offset({d1, d2}, 8, 4); + CHECK(r2 == std::vector{make_d1(2, 2), d2.domain}); + + const auto r3 = + instance_domain_tile_offset({d1, d2}, 8, 12); + CHECK(r3 == std::vector{make_d1(2, 4), d2.domain}); + } + + SECTION("Line") { + const auto r1 = + instance_domain_tile_offset({d1, d2}, 0, 2); + CHECK(r1 == std::vector{make_d1(0, 0), make_d2(0, 1)}); + + const auto r2 = + instance_domain_tile_offset({d1, d2}, 1, 2); + CHECK( + r2 == std::vector{ + make_d1(0, 0), + make_d2(1, 2), + }); + + const auto r3 = + instance_domain_tile_offset({d1, d2}, 9, 3); + CHECK(r3 == std::vector{make_d1(2, 2), make_d2(1, 3)}); + } + + SECTION("Align start but not end") { + const auto r1 = + instance_domain_tile_offset({d1, d2}, 0, 5); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2}, 4, 11); + CHECK(r2 == std::optional>{}); + } + + SECTION("Cross row") { + const auto r1 = + instance_domain_tile_offset({d1, d2}, 7, 2); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2}, 5, 4); + CHECK(r2 == std::optional>{}); + + const auto r3 = + instance_domain_tile_offset({d1, d2}, 5, 8); + CHECK(r3 == std::optional>{}); + } + + SECTION("Column major") { + const auto r1 = instance_domain_tile_offset( + {d1, d2}, 0, 10, Layout::COL_MAJOR); + CHECK(r1 == std::vector{d1.domain, make_d2(0, 1)}); + + const auto r2 = instance_domain_tile_offset( + {d1, d2}, 11, 4, Layout::COL_MAJOR); + CHECK(r2 == std::vector{make_d1(1, 4), make_d2(2, 2)}); + + const auto r3 = instance_domain_tile_offset( + {d1, d2}, 11, 5, Layout::COL_MAJOR); + CHECK(r3 == std::optional>{}); + } + } + + SECTION("CORE-290 Example") { + const Dim64 row(0, std::numeric_limits::max() - 1, 4); + const Dim64 col(0, 99999, 100000 / row.extent); + + auto make_row = [&](uint64_t r_start, uint64_t r_end) { + return Dom64( + row.domain.lower_bound + r_start * row.extent, + row.domain.lower_bound + r_end * row.extent + row.extent - 1); + }; + + const auto r1 = instance_domain_tile_offset( + {row, col}, 0, 4, Layout::ROW_MAJOR); + CHECK(r1 == std::vector{make_row(0, 0), col.domain}); + } + + SECTION("Hyperrow overflow") { + const uint64_t target_tiles_in_domain = 1 << 16; + const uint64_t lower_bound = 0; + const uint64_t upper_bound = std::numeric_limits::max() - 1; + const uint64_t extent = + (upper_bound - lower_bound + 1) / target_tiles_in_domain; + const Dim64 d(lower_bound, upper_bound, extent); + + SECTION("Not overflow") { + const auto r = instance_domain_tile_offset( + {d, d, d, d}, 0, 1, Layout::ROW_MAJOR); + CHECK( + r == std::vector{ + Dom64(0, extent - 1), + Dom64(0, extent - 1), + Dom64(0, extent - 1), + Dom64(0, extent - 1)}); + } + + SECTION("Overflow") { + const auto expect = Catch::Matchers::ContainsSubstring( + "Fragment size is too small to subdivide dense subarray into " + "multiple fragments"); + REQUIRE_THROWS( + instance_domain_tile_offset( + {d, d, d, d, d}, 0, 1, Layout::ROW_MAJOR), + expect); + } + } + + rc::prop("any tiles", []() { + const Dim64 d1 = *rc::make_dimension(std::nullopt, {64}); + const Dim64 d2 = *rc::make_dimension(std::nullopt, {64}); + const Layout tile_order = + *rc::gen::element(Layout::ROW_MAJOR, Layout::COL_MAJOR); + + instance_domain_tile_offset( + {d1, d2}, tile_order); + }); +} + +TEST_CASE("domain_tile_offset 3d", "[arithmetic]") { + using Dim64 = templates::Dimension; + using Dom64 = Dim64::domain_type; + + SECTION("Rectangular prism examples") { + const uint64_t d1_lower_bound = GENERATE(0, 3); + const uint64_t d1_extent = GENERATE(1, 4); + const uint64_t d2_lower_bound = GENERATE(0, 3); + const uint64_t d2_extent = GENERATE(1, 4); + const uint64_t d3_lower_bound = GENERATE(0, 3); + const uint64_t d3_extent = GENERATE(1, 4); + + const Dim64 d1( + d1_lower_bound, d1_lower_bound + (3 * d1_extent) - 1, d1_extent); + const Dim64 d2( + d2_lower_bound, d2_lower_bound + (6 * d2_extent) - 1, d2_extent); + const Dim64 d3( + d3_lower_bound, d3_lower_bound + (7 * d3_extent) - 1, d3_extent); + + auto make_d1 = [&](uint64_t h_start, uint64_t h_end) { + return Dom64( + d1_lower_bound + h_start * d1_extent, + d1_lower_bound + h_end * d1_extent + d1_extent - 1); + }; + auto make_d2 = [&](uint64_t w_start, uint64_t w_end) { + return Dom64( + d2_lower_bound + w_start * d2_extent, + d2_lower_bound + w_end * d2_extent + d2_extent - 1); + }; + auto make_d3 = [&](uint64_t l_start, uint64_t l_end) { + return Dom64( + d3_lower_bound + l_start * d3_extent, + d3_lower_bound + l_end * d3_extent + d3_extent - 1); + }; + + SECTION("Whole domain") { + const Layout tile_order = GENERATE(Layout::ROW_MAJOR, Layout::COL_MAJOR); + const auto r = instance_domain_tile_offset( + {d1, d2, d3}, + 0, + d1.num_tiles() * d2.num_tiles() * d3.num_tiles(), + tile_order); + CHECK(r == std::vector{d1.domain, d2.domain, d3.domain}); + } + + SECTION("Plane") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 42); + CHECK(r1 == std::vector{make_d1(0, 0), d2.domain, d3.domain}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 42, 42); + CHECK(r2 == std::vector{make_d1(1, 1), d2.domain, d3.domain}); + + const auto r3 = + instance_domain_tile_offset({d1, d2, d3}, 84, 42); + CHECK(r3 == std::vector{make_d1(2, 2), d2.domain, d3.domain}); + } + + SECTION("Rectangle") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 14); + CHECK(r1 == std::vector{make_d1(0, 0), make_d2(0, 1), d3.domain}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 70, 14); + CHECK(r2 == std::vector{make_d1(1, 1), make_d2(4, 5), d3.domain}); + } + + SECTION("Line") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 4); + CHECK( + r1 == + std::vector{make_d1(0, 0), make_d2(0, 0), make_d3(0, 3)}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 8, 2); + CHECK( + r2 == + std::vector{make_d1(0, 0), make_d2(1, 1), make_d3(1, 2)}); + + const auto r3 = + instance_domain_tile_offset({d1, d2, d3}, 109, 3); + CHECK( + r3 == + std::vector{make_d1(2, 2), make_d2(3, 3), make_d3(4, 6)}); + } + + SECTION("Align start but not end") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 43); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 42, 125); + CHECK(r2 == std::optional>{}); + } + + SECTION("Cross row") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 0, 8); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 23, 6); + CHECK(r2 == std::optional>{}); + } + + SECTION("Cross plane") { + const auto r1 = + instance_domain_tile_offset({d1, d2, d3}, 40, 3); + CHECK(r1 == std::optional>{}); + + const auto r2 = + instance_domain_tile_offset({d1, d2, d3}, 77, 8); + CHECK(r2 == std::optional>{}); + } + + SECTION("Column major") { + const auto r1 = instance_domain_tile_offset( + {d1, d2, d3}, 54, 36, Layout::COL_MAJOR); + CHECK(r1 == std::vector{d1.domain, d2.domain, make_d3(3, 4)}); + + const auto r2 = instance_domain_tile_offset( + {d1, d2, d3}, 78, 12, Layout::COL_MAJOR); + CHECK(r2 == std::vector{d1.domain, make_d2(2, 5), make_d3(4, 4)}); + } + } + + rc::prop("any tiles", []() { + const Dim64 d1 = + *rc::make_dimension(std::nullopt, {16}); + const Dim64 d2 = + *rc::make_dimension(std::nullopt, {16}); + const Dim64 d3 = + *rc::make_dimension(std::nullopt, {16}); + const Layout tile_order = + *rc::gen::element(Layout::ROW_MAJOR, Layout::COL_MAJOR); + + instance_domain_tile_offset( + {d1, d2, d3}, tile_order); + }); +} diff --git a/tiledb/sm/tile/tile.h b/tiledb/sm/tile/tile.h index 3d730f86a20..d82382d9da2 100644 --- a/tiledb/sm/tile/tile.h +++ b/tiledb/sm/tile/tile.h @@ -458,6 +458,13 @@ class WriterTile : public TileBase { return filtered_buffer_; } + /** + * Returns the buffer that contains the filtered, on-disk format. + */ + inline const FilteredBuffer& filtered_buffer() const { + return filtered_buffer_; + } + /** * Write method used for var data. Resizes the internal buffer if needed. * diff --git a/tiledb/sm/tile/writer_tile_tuple.cc b/tiledb/sm/tile/writer_tile_tuple.cc index 9ce07d20f95..e6e823ec153 100644 --- a/tiledb/sm/tile/writer_tile_tuple.cc +++ b/tiledb/sm/tile/writer_tile_tuple.cc @@ -114,5 +114,20 @@ void WriterTileTuple::set_metadata( } } +std::optional WriterTileTuple::filtered_size() const { + uint64_t tile_size = 0; + if (var_size()) { + tile_size += offset_tile().filtered_buffer().size(); + tile_size += var_tile().filtered_buffer().size(); + } else { + tile_size += fixed_tile().filtered_buffer().size(); + } + + if (nullable()) { + tile_size += validity_tile().filtered_buffer().size(); + } + return tile_size; +} + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/tile/writer_tile_tuple.h b/tiledb/sm/tile/writer_tile_tuple.h index 8a2ca28938b..37339ecc836 100644 --- a/tiledb/sm/tile/writer_tile_tuple.h +++ b/tiledb/sm/tile/writer_tile_tuple.h @@ -212,6 +212,12 @@ class WriterTileTuple { return cell_num_; } + /** + * @return the total size of the filtered tiles, or `std::nullopt` if not + * filtered. + */ + std::optional filtered_size() const; + private: /* ********************************* */ /* PRIVATE ATTRIBUTES */ diff --git a/tiledb/type/range/range.h b/tiledb/type/range/range.h index 3ea85383a2e..89a65d7ff0a 100644 --- a/tiledb/type/range/range.h +++ b/tiledb/type/range/range.h @@ -291,6 +291,12 @@ class Range { return range_.data(); } + inline void* start_fixed() { + iassert(!var_size_); + iassert(range_.size() != 0); + return range_.data(); + } + /** Copies 'start' into this range's start bytes for fixed-size ranges. */ void set_start_fixed(const void* const start) { if (var_size_) { @@ -354,6 +360,13 @@ class Range { return &range_[end_pos]; } + void* end_fixed() { + iassert(!var_size_); + iassert(range_.size() != 0); + auto end_pos = range_.size() / 2; + return &range_[end_pos]; + } + /** Copies 'end' into this range's end bytes for fixed-size ranges. */ void set_end_fixed(const void* const end) { if (var_size_) {