Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions gcloud/bigtable/happybase/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@

import six

from gcloud._helpers import _datetime_from_microseconds
from gcloud._helpers import _microseconds_from_datetime
from gcloud._helpers import _to_bytes
from gcloud._helpers import _total_seconds
from gcloud.bigtable.column_family import GCRuleIntersection
from gcloud.bigtable.column_family import MaxAgeGCRule
from gcloud.bigtable.column_family import MaxVersionsGCRule
from gcloud.bigtable.happybase.batch import _WAL_SENTINEL
from gcloud.bigtable.happybase.batch import Batch
from gcloud.bigtable.table import Table as _LowLevelTable
from gcloud.bigtable.row import TimestampRange


_UNPACK_I64 = struct.Struct('>q').unpack
Expand Down Expand Up @@ -563,3 +567,155 @@ def _gc_rule_to_dict(gc_rule):
if key1 != key2:
result = {key1: rule1[key1], key2: rule2[key2]}
return result


def _next_char(str_val, index):
"""Gets the next character based on a position in a string.

:type str_val: str
:param str_val: A string containing the character to update.

:type index: int
:param index: An integer index in ``str_val``.

:rtype: str
:returns: The next character after the character at ``index``
in ``str_val``.
"""
ord_val = six.indexbytes(str_val, index)
return _to_bytes(chr(ord_val + 1), encoding='latin-1')


def _string_successor(str_val):
"""Increment and truncate a byte string.

Determines shortest string that sorts after the given string when
compared using regular string comparison semantics.

Modeled after implementation in ``gcloud-golang``.

Increments the last byte that is smaller than ``0xFF``, and
drops everything after it. If the string only contains ``0xFF`` bytes,
``''`` is returned.

:type str_val: str
:param str_val: String to increment.

:rtype: str
:returns: The next string in lexical order after ``str_val``.
"""
str_val = _to_bytes(str_val, encoding='latin-1')

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

if str_val == b'':
return str_val

index = len(str_val) - 1
while index >= 0:
if six.indexbytes(str_val, index) != 0xff:
break
index -= 1

if index == -1:
return b''

return str_val[:index] + _next_char(str_val, index)


def _convert_to_time_range(timestamp=None):
"""Create a timestamp range from an HBase / HappyBase timestamp.

HBase uses timestamp as an argument to specify an exclusive end
deadline. Cloud Bigtable also uses exclusive end times, so
the behavior matches.

:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch). Intended to be used as the end of an HBase
time range, which is exclusive.

:rtype: :class:`.TimestampRange`, :data:`NoneType <types.NoneType>`
:returns: The timestamp range corresponding to the passed in
``timestamp``.
"""
if timestamp is None:
return None

next_timestamp = _datetime_from_microseconds(1000 * timestamp)
return TimestampRange(end=next_timestamp)


def _cells_to_pairs(cells, include_timestamp=False):
"""Converts list of cells to HappyBase format.

For example::

>>> import datetime
>>> from gcloud.bigtable.row_data import Cell
>>> cell1 = Cell(b'val1', datetime.datetime.utcnow())
>>> cell2 = Cell(b'val2', datetime.datetime.utcnow())
>>> _cells_to_pairs([cell1, cell2])
[b'val1', b'val2']
>>> _cells_to_pairs([cell1, cell2], include_timestamp=True)
[(b'val1', 1456361486255), (b'val2', 1456361491927)]

:type cells: list
:param cells: List of :class:`.Cell` returned from a read request.

:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.

:rtype: list
:returns: List of values in the cell. If ``include_timestamp=True``, each

This comment was marked as spam.

This comment was marked as spam.

value will be a pair, with the first part the bytes value in
the cell and the second part the number of milliseconds in the
timestamp on the cell.
"""
result = []
for cell in cells:
if include_timestamp:
ts_millis = _microseconds_from_datetime(cell.timestamp) // 1000
result.append((cell.value, ts_millis))
else:
result.append(cell.value)
return result


def _partial_row_to_dict(partial_row_data, include_timestamp=False):
"""Convert a low-level row data object to a dictionary.

Assumes only the latest value in each row is needed. This assumption
is due to the fact that this method is used by callers which use
a ``CellsColumnLimitFilter(1)`` filter.

For example::

>>> import datetime
>>> from gcloud.bigtable.row_data import Cell, PartialRowData
>>> cell1 = Cell(b'val1', datetime.datetime.utcnow())
>>> cell2 = Cell(b'val2', datetime.datetime.utcnow())
>>> row_data = PartialRowData(b'row-key')
>>> _partial_row_to_dict(row_data)
{}
>>> row_data._cells[u'fam1'] = {b'col1': [cell1], b'col2': [cell2]}
>>> _partial_row_to_dict(row_data)
{b'fam1:col2': b'val2', b'fam1:col1': b'val1'}
>>> _partial_row_to_dict(row_data, include_timestamp=True)
{b'fam1:col2': (b'val2', 1456361724480),
b'fam1:col1': (b'val1', 1456361721135)}

:type partial_row_data: :class:`.row_data.PartialRowData`
:param partial_row_data: Row data consumed from a stream.

:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.

:rtype: dict
:returns: The row data converted to a dictionary.
"""
result = {}
for column, cells in six.iteritems(partial_row_data.to_dict()):
cell_vals = _cells_to_pairs(cells,
include_timestamp=include_timestamp)
result[column] = cell_vals[0]
return result
136 changes: 136 additions & 0 deletions gcloud/bigtable/happybase/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,142 @@ def test_with_intersection_two_nested_rules(self):
self.assertTrue(result is gc_rule)


class Test__string_successor(unittest2.TestCase):

def _callFUT(self, *args, **kwargs):
from gcloud.bigtable.happybase.table import _string_successor
return _string_successor(*args, **kwargs)

def test_with_alphanumeric(self):
self.assertEqual(self._callFUT(b'boa'), b'bob')
self.assertEqual(self._callFUT(b'abc1'), b'abc2')

def test_with_last_byte(self):
self.assertEqual(self._callFUT(b'boa\xff'), b'bob')

def test_with_empty_string(self):
self.assertEqual(self._callFUT(b''), b'')

def test_with_all_last_bytes(self):
self.assertEqual(self._callFUT(b'\xff\xff\xff'), b'')

def test_with_unicode_input(self):
self.assertEqual(self._callFUT(u'boa'), b'bob')


class Test__convert_to_time_range(unittest2.TestCase):

def _callFUT(self, timestamp=None):
from gcloud.bigtable.happybase.table import _convert_to_time_range
return _convert_to_time_range(timestamp=timestamp)

def test_null(self):
timestamp = None
result = self._callFUT(timestamp=timestamp)
self.assertEqual(result, None)

def test_invalid_type(self):
timestamp = object()
with self.assertRaises(TypeError):
self._callFUT(timestamp=timestamp)

def test_success(self):
from gcloud._helpers import _datetime_from_microseconds
from gcloud.bigtable.row import TimestampRange

timestamp = 1441928298571
ts_dt = _datetime_from_microseconds(1000 * timestamp)
result = self._callFUT(timestamp=timestamp)
self.assertTrue(isinstance(result, TimestampRange))
self.assertEqual(result.start, None)
self.assertEqual(result.end, ts_dt)


class Test__cells_to_pairs(unittest2.TestCase):

def _callFUT(self, *args, **kwargs):
from gcloud.bigtable.happybase.table import _cells_to_pairs
return _cells_to_pairs(*args, **kwargs)

def test_without_timestamp(self):
from gcloud.bigtable.row_data import Cell

value1 = 'foo'
cell1 = Cell(value=value1, timestamp=None)
value2 = 'bar'
cell2 = Cell(value=value2, timestamp=None)

result = self._callFUT([cell1, cell2])
self.assertEqual(result, [value1, value2])

def test_with_timestamp(self):
from gcloud._helpers import _datetime_from_microseconds
from gcloud.bigtable.row_data import Cell

value1 = 'foo'
ts1_millis = 1221934570148
ts1 = _datetime_from_microseconds(ts1_millis * 1000)
cell1 = Cell(value=value1, timestamp=ts1)

value2 = 'bar'
ts2_millis = 1221955575548
ts2 = _datetime_from_microseconds(ts2_millis * 1000)
cell2 = Cell(value=value2, timestamp=ts2)

result = self._callFUT([cell1, cell2], include_timestamp=True)
self.assertEqual(result,
[(value1, ts1_millis), (value2, ts2_millis)])


class Test__partial_row_to_dict(unittest2.TestCase):

def _callFUT(self, partial_row_data, include_timestamp=False):
from gcloud.bigtable.happybase.table import _partial_row_to_dict
return _partial_row_to_dict(partial_row_data,
include_timestamp=include_timestamp)

def test_without_timestamp(self):
from gcloud.bigtable.row_data import Cell
from gcloud.bigtable.row_data import PartialRowData

row_data = PartialRowData(b'row-key')
val1 = b'hi-im-bytes'
val2 = b'bi-im-hytes'
row_data._cells[u'fam1'] = {
b'col1': [Cell(val1, None)],
b'col2': [Cell(val2, None)],
}
result = self._callFUT(row_data)
expected_result = {
b'fam1:col1': val1,
b'fam1:col2': val2,
}
self.assertEqual(result, expected_result)

def test_with_timestamp(self):
from gcloud._helpers import _datetime_from_microseconds
from gcloud.bigtable.row_data import Cell
from gcloud.bigtable.row_data import PartialRowData

row_data = PartialRowData(b'row-key')
val1 = b'hi-im-bytes'
ts1_millis = 1221934570148
ts1 = _datetime_from_microseconds(ts1_millis * 1000)
val2 = b'bi-im-hytes'
ts2_millis = 1331934880000
ts2 = _datetime_from_microseconds(ts2_millis * 1000)
row_data._cells[u'fam1'] = {
b'col1': [Cell(val1, ts1)],
b'col2': [Cell(val2, ts2)],
}
result = self._callFUT(row_data, include_timestamp=True)
expected_result = {
b'fam1:col1': (val1, ts1_millis),
b'fam1:col2': (val2, ts2_millis),
}
self.assertEqual(result, expected_result)


class _Connection(object):

def __init__(self, cluster):
Expand Down