Skip to content

Commit ad3a39a

Browse files
committed
Adding HappyBase Connection.create_table().
1 parent 8749101 commit ad3a39a

File tree

2 files changed

+308
-0
lines changed

2 files changed

+308
-0
lines changed

gcloud/bigtable/happybase/connection.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,17 @@
1515
"""Google Cloud Bigtable HappyBase connection module."""
1616

1717

18+
import datetime
1819
import warnings
1920

2021
import six
2122

2223
from gcloud.bigtable.client import Client
24+
from gcloud.bigtable.column_family import GCRuleIntersection
25+
from gcloud.bigtable.column_family import MaxAgeGCRule
26+
from gcloud.bigtable.column_family import MaxVersionsGCRule
2327
from gcloud.bigtable.happybase.table import Table
28+
from gcloud.bigtable.table import Table as _LowLevelTable
2429

2530

2631
# Constants reproduced here for HappyBase compatibility, though values
@@ -234,3 +239,122 @@ def table(self, name, use_prefix=True):
234239
if use_prefix:
235240
name = self._table_name(name)
236241
return Table(name, self)
242+
243+
def create_table(self, name, families):
244+
"""Create a table.
245+
246+
.. warning::
247+
248+
The only column family options from HappyBase that are able to be
249+
used with Cloud Bigtable are ``max_versions`` and ``time_to_live``.
250+
251+
.. note::
252+
253+
This method is **not** atomic. The Cloud Bigtable API separates
254+
the creation of a table from the creation of column families. Thus
255+
this method needs to send 1 request for the table creation and 1
256+
request for each column family. If any of these fails, the method
257+
will fail, but the progress made towards completion cannot be
258+
rolled back.
259+
260+
Values in ``families`` represent column family options. In HappyBase,
261+
these are dictionaries, corresponding to the ``ColumnDescriptor``
262+
structure in the Thrift API. The accepted keys are:
263+
264+
* ``max_versions`` (``int``)
265+
* ``compression`` (``str``)
266+
* ``in_memory`` (``bool``)
267+
* ``bloom_filter_type`` (``str``)
268+
* ``bloom_filter_vector_size`` (``int``)
269+
* ``bloom_filter_nb_hashes`` (``int``)
270+
* ``block_cache_enabled`` (``bool``)
271+
* ``time_to_live`` (``int``)
272+
273+
:type name: str
274+
:param name: The name of the table to be created.
275+
276+
:type families: dict
277+
:param families: Dictionary with column family names as keys and column
278+
family options as the values. The options can be among
279+
280+
* :class:`dict`
281+
* :class:`.GarbageCollectionRule`
282+
283+
:raises: :class:`TypeError <exceptions.TypeError>` if ``families`` is
284+
not a dictionary,
285+
:class:`ValueError <exceptions.ValueError>` if ``families``
286+
has no entries
287+
"""
288+
if not isinstance(families, dict):
289+
raise TypeError('families arg must be a dictionary')
290+
291+
if not families:
292+
raise ValueError('Cannot create table %r (no column '
293+
'families specified)' % (name,))
294+
295+
# Parse all keys before making any API requests.
296+
gc_rule_dict = {}
297+
for column_family_name, option in families.items():
298+
if isinstance(column_family_name, six.binary_type):
299+
column_family_name = column_family_name.decode('utf-8')
300+
if column_family_name.endswith(':'):
301+
column_family_name = column_family_name[:-1]
302+
gc_rule_dict[column_family_name] = _parse_family_option(option)
303+
304+
# Create table instance and then make API calls.
305+
name = self._table_name(name)
306+
low_level_table = _LowLevelTable(name, self._cluster)
307+
low_level_table.create()
308+
309+
for column_family_name, gc_rule in gc_rule_dict.items():
310+
column_family = low_level_table.column_family(
311+
column_family_name, gc_rule=gc_rule)
312+
column_family.create()
313+
314+
315+
def _parse_family_option(option):
316+
"""Parses a column family option into a garbage collection rule.
317+
318+
.. note::
319+
320+
If ``option`` is not a dictionary, the type is not checked.
321+
If ``option`` is :data:`None`, there is nothing to do, since this
322+
is the correct output.
323+
324+
:type option: :class:`dict`,
325+
:data:`NoneType <types.NoneType>`,
326+
:class:`.GarbageCollectionRule`
327+
:param option: A column family option passes as a dictionary value in
328+
:meth:`Connection.create_table`.
329+
330+
:rtype: :class:`.GarbageCollectionRule`
331+
:returns: A garbage collection rule parsed from the input.
332+
:raises: :class:`ValueError <exceptions.ValueError>` if ``option`` is a
333+
dictionary but keys other than ``max_versions`` and
334+
``time_to_live`` are used.
335+
"""
336+
result = option
337+
if isinstance(result, dict):
338+
if not set(result.keys()) <= set(['max_versions', 'time_to_live']):
339+
raise ValueError('Cloud Bigtable only supports max_versions and '
340+
'time_to_live column family settings',
341+
'Received', result.keys())
342+
343+
max_num_versions = result.get('max_versions')
344+
max_age = None
345+
if 'time_to_live' in result:
346+
max_age = datetime.timedelta(seconds=result['time_to_live'])
347+
348+
if len(result) == 0:
349+
result = None
350+
elif len(result) == 1:
351+
if max_num_versions is None:
352+
result = MaxAgeGCRule(max_age)
353+
else:
354+
result = MaxVersionsGCRule(max_num_versions)
355+
else: # By our check above we know this means len(result) == 2.
356+
rule1 = MaxAgeGCRule(max_age)
357+
rule2 = MaxVersionsGCRule(max_num_versions)
358+
result = GCRuleIntersection(rules=[rule1, rule2])
359+
360+
return result

gcloud/bigtable/happybase/test_connection.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,162 @@ def test_table_factory_with_prefix(self):
278278
def test_table_factory_with_ignored_prefix(self):
279279
self._table_factory_prefix_helper(use_prefix=False)
280280

281+
def test_create_table(self):
282+
import operator
283+
from gcloud._testing import _Monkey
284+
from gcloud.bigtable.happybase import connection as MUT
285+
286+
cluster = _Cluster() # Avoid implicit environ check.
287+
connection = self._makeOne(autoconnect=False, cluster=cluster)
288+
mock_gc_rule = object()
289+
called_options = []
290+
291+
def mock_parse_family_option(option):
292+
called_options.append(option)
293+
return mock_gc_rule
294+
295+
name = 'table-name'
296+
col_fam1 = 'cf1'
297+
col_fam_option1 = object()
298+
col_fam2 = u'cf2'
299+
col_fam_option2 = object()
300+
col_fam3 = b'cf3'
301+
col_fam_option3 = object()
302+
families = {
303+
col_fam1: col_fam_option1,
304+
# A trailing colon is also allowed.
305+
col_fam2 + ':': col_fam_option2,
306+
col_fam3 + b':': col_fam_option3,
307+
}
308+
309+
tables_created = []
310+
311+
def make_table(*args, **kwargs):
312+
result = _MockLowLevelTable(*args, **kwargs)
313+
tables_created.append(result)
314+
return result
315+
316+
with _Monkey(MUT, _LowLevelTable=make_table,
317+
_parse_family_option=mock_parse_family_option):
318+
connection.create_table(name, families)
319+
320+
# Just one table would have been created.
321+
table_instance, = tables_created
322+
self.assertEqual(table_instance.args, (name, cluster))
323+
self.assertEqual(table_instance.kwargs, {})
324+
self.assertEqual(table_instance.create_calls, 1)
325+
326+
# Check if our mock was called twice, but we don't know the order.
327+
self.assertEqual(
328+
set(called_options),
329+
set([col_fam_option1, col_fam_option2, col_fam_option3]))
330+
331+
# We expect three column family instances created, but don't know the
332+
# order due to non-deterministic dict.items().
333+
col_fam_created = table_instance.col_fam_created
334+
self.assertEqual(len(col_fam_created), 3)
335+
col_fam_created.sort(key=operator.attrgetter('column_family_id'))
336+
self.assertEqual(col_fam_created[0].column_family_id, col_fam1)
337+
self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule)
338+
self.assertEqual(col_fam_created[0].create_calls, 1)
339+
self.assertEqual(col_fam_created[1].column_family_id, col_fam2)
340+
self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule)
341+
self.assertEqual(col_fam_created[1].create_calls, 1)
342+
self.assertEqual(col_fam_created[2].column_family_id,
343+
col_fam3.decode('utf-8'))
344+
self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule)
345+
self.assertEqual(col_fam_created[2].create_calls, 1)
346+
347+
def test_create_table_bad_type(self):
348+
cluster = _Cluster() # Avoid implicit environ check.
349+
connection = self._makeOne(autoconnect=False, cluster=cluster)
350+
351+
name = 'table-name'
352+
families = None
353+
with self.assertRaises(TypeError):
354+
connection.create_table(name, families)
355+
356+
def test_create_table_bad_value(self):
357+
cluster = _Cluster() # Avoid implicit environ check.
358+
connection = self._makeOne(autoconnect=False, cluster=cluster)
359+
360+
name = 'table-name'
361+
families = {}
362+
with self.assertRaises(ValueError):
363+
connection.create_table(name, families)
364+
365+
366+
class Test__parse_family_option(unittest2.TestCase):
367+
368+
def _callFUT(self, option):
369+
from gcloud.bigtable.happybase.connection import _parse_family_option
370+
return _parse_family_option(option)
371+
372+
def test_dictionary_no_keys(self):
373+
option = {}
374+
result = self._callFUT(option)
375+
self.assertEqual(result, None)
376+
377+
def test_null(self):
378+
option = None
379+
result = self._callFUT(option)
380+
self.assertEqual(result, None)
381+
382+
def test_dictionary_bad_key(self):
383+
option = {'badkey': None}
384+
with self.assertRaises(ValueError):
385+
self._callFUT(option)
386+
387+
def test_dictionary_versions_key(self):
388+
from gcloud.bigtable.column_family import MaxVersionsGCRule
389+
390+
versions = 42
391+
option = {'max_versions': versions}
392+
result = self._callFUT(option)
393+
394+
gc_rule = MaxVersionsGCRule(versions)
395+
self.assertEqual(result, gc_rule)
396+
397+
def test_dictionary_ttl_key(self):
398+
import datetime
399+
from gcloud.bigtable.column_family import MaxAgeGCRule
400+
401+
time_to_live = 24 * 60 * 60
402+
max_age = datetime.timedelta(days=1)
403+
option = {'time_to_live': time_to_live}
404+
result = self._callFUT(option)
405+
406+
gc_rule = MaxAgeGCRule(max_age)
407+
self.assertEqual(result, gc_rule)
408+
409+
def test_dictionary_both_keys(self):
410+
import datetime
411+
from gcloud.bigtable.column_family import GCRuleIntersection
412+
from gcloud.bigtable.column_family import MaxAgeGCRule
413+
from gcloud.bigtable.column_family import MaxVersionsGCRule
414+
415+
versions = 42
416+
time_to_live = 24 * 60 * 60
417+
option = {
418+
'max_versions': versions,
419+
'time_to_live': time_to_live,
420+
}
421+
result = self._callFUT(option)
422+
423+
max_age = datetime.timedelta(days=1)
424+
# NOTE: This relies on the order of the rules in the method we are
425+
# calling matching this order here.
426+
gc_rule1 = MaxAgeGCRule(max_age)
427+
gc_rule2 = MaxVersionsGCRule(versions)
428+
gc_rule = GCRuleIntersection(rules=[gc_rule1, gc_rule2])
429+
self.assertEqual(result, gc_rule)
430+
431+
def test_non_dictionary(self):
432+
option = object()
433+
self.assertFalse(isinstance(option, dict))
434+
result = self._callFUT(option)
435+
self.assertEqual(result, option)
436+
281437

282438
class _Client(object):
283439

@@ -316,3 +472,31 @@ def copy(self):
316472
return result
317473
else:
318474
return self
475+
476+
477+
class _MockLowLevelColumnFamily(object):
478+
479+
def __init__(self, column_family_id, gc_rule=None):
480+
self.column_family_id = column_family_id
481+
self.gc_rule = gc_rule
482+
self.create_calls = 0
483+
484+
def create(self):
485+
self.create_calls += 1
486+
487+
488+
class _MockLowLevelTable(object):
489+
490+
def __init__(self, *args, **kwargs):
491+
self.args = args
492+
self.kwargs = kwargs
493+
self.create_calls = 0
494+
self.col_fam_created = []
495+
496+
def create(self):
497+
self.create_calls += 1
498+
499+
def column_family(self, column_family_id, gc_rule=None):
500+
result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule)
501+
self.col_fam_created.append(result)
502+
return result

0 commit comments

Comments
 (0)