Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions pyiceberg/table/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,9 @@ def set_when_positive(properties: Dict[str, str], num: int, property_name: str)

def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: TableMetadata) -> Iterable[Snapshot]:
"""Get the ancestors of and including the given snapshot."""
if current_snapshot:
yield current_snapshot
if current_snapshot.parent_snapshot_id is not None:
if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id):
yield from ancestors_of(parent, table_metadata)
snapshot = current_snapshot
while snapshot is not None:
yield snapshot
if snapshot.parent_snapshot_id is None:
break
snapshot = table_metadata.snapshot_by_id(snapshot.parent_snapshot_id)
86 changes: 85 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@
import re
import socket
import string
import time
import uuid
from datetime import date, datetime, timezone
from pathlib import Path
from random import choice
from random import choice, randint
from tempfile import TemporaryDirectory
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -731,6 +732,77 @@ def example_table_metadata_no_snapshot_v1() -> Dict[str, Any]:
return EXAMPLE_TABLE_METADATA_NO_SNAPSHOT_V1


@pytest.fixture
def example_table_metadata_v2_with_extensive_snapshots() -> Dict[str, Any]:
def generate_snapshot(
snapshot_id: int,
parent_snapshot_id: Optional[int] = None,
timestamp_ms: Optional[int] = None,
sequence_number: int = 0,
) -> Dict[str, Any]:
return {
"snapshot-id": snapshot_id,
"parent-snapshot-id": parent_snapshot_id,
"timestamp-ms": timestamp_ms or int(time.time() * 1000),
"sequence-number": sequence_number,
"summary": {"operation": "append"},
"manifest-list": f"s3://a/b/{snapshot_id}.avro",
}

snapshots = []
snapshot_log = []
initial_snapshot_id = 3051729675574597004

for i in range(2000):
snapshot_id = initial_snapshot_id + i
parent_snapshot_id = snapshot_id - 1 if i > 0 else None
timestamp_ms = int(time.time() * 1000) - randint(0, 1000000)
snapshots.append(generate_snapshot(snapshot_id, parent_snapshot_id, timestamp_ms, i))
snapshot_log.append({"snapshot-id": snapshot_id, "timestamp-ms": timestamp_ms})

return {
"format-version": 2,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
"location": "s3://bucket/test/location",
"last-sequence-number": 34,
"last-updated-ms": 1602638573590,
"last-column-id": 3,
"current-schema-id": 1,
"schemas": [
{"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
{
"type": "struct",
"schema-id": 1,
"identifier-field-ids": [1, 2],
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
},
],
"default-spec-id": 0,
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
"last-partition-id": 1000,
"default-sort-order-id": 3,
"sort-orders": [
{
"order-id": 3,
"fields": [
{"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"},
{"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"},
],
}
],
"properties": {"read.split.target.size": "134217728"},
"current-snapshot-id": initial_snapshot_id + 1999,
"snapshots": snapshots,
"snapshot-log": snapshot_log,
"metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", "timestamp-ms": 1515100}],
"refs": {"test": {"snapshot-id": initial_snapshot_id, "type": "tag", "max-ref-age-ms": 10000000}},
}


EXAMPLE_TABLE_METADATA_V2 = {
"format-version": 2,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
Expand Down Expand Up @@ -1992,6 +2064,18 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
)


@pytest.fixture
def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots: Dict[str, Any]) -> Table:
table_metadata = TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)
return Table(
identifier=("database", "table"),
metadata=table_metadata,
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
io=load_file_io(),
catalog=NoopCatalog("NoopCatalog"),
)


@pytest.fixture
def bound_reference_str() -> BoundReference[str]:
return BoundReference(field=NestedField(1, "field", StringType(), required=False), accessor=Accessor(position=0, inner=None))
Expand Down
15 changes: 15 additions & 0 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,21 @@ def test_ancestors_of(table_v2: Table) -> None:
]


def test_ancestors_of_recursive_error(table_v2_with_extensive_snapshots: Table) -> None:
# Test RecursionError: maximum recursion depth exceeded
assert (
len(
list(
ancestors_of(
table_v2_with_extensive_snapshots.current_snapshot(),
table_v2_with_extensive_snapshots.metadata,
)
)
)
== 2000
)


def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None:
assert table_v2.snapshot_by_id(-1) is None

Expand Down