Skip to content

Commit 10d0ea4

Browse files
committed
Adding HappyBase Connection.create_table().
1 parent 9f5d9a4 commit 10d0ea4

File tree

2 files changed

+308
-0
lines changed

2 files changed

+308
-0
lines changed

gcloud/bigtable/happybase/connection.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,17 @@
1515
"""Google Cloud Bigtable HappyBase connection module."""
1616

1717

18+
import datetime
1819
import warnings
1920

2021
import six
2122

2223
from gcloud.bigtable.client import Client
24+
from gcloud.bigtable.column_family import GCRuleIntersection
25+
from gcloud.bigtable.column_family import MaxAgeGCRule
26+
from gcloud.bigtable.column_family import MaxVersionsGCRule
2327
from gcloud.bigtable.happybase.table import Table
28+
from gcloud.bigtable.table import Table as _LowLevelTable
2429

2530

2631
# Constants reproduced here for HappyBase compatibility, though values
@@ -264,6 +269,77 @@ def tables(self):
264269

265270
return table_names
266271

272+
def create_table(self, name, families):
273+
"""Create a table.
274+
275+
.. warning::
276+
277+
The only column family options from HappyBase that are able to be
278+
used with Cloud Bigtable are ``max_versions`` and ``time_to_live``.
279+
280+
.. note::
281+
282+
This method is **not** atomic. The Cloud Bigtable API separates
283+
the creation of a table from the creation of column families. Thus
284+
this method needs to send 1 request for the table creation and 1
285+
request for each column family. If any of these fails, the method
286+
will fail, but the progress made towards completion cannot be
287+
rolled back.
288+
289+
Values in ``families`` represent column family options. In HappyBase,
290+
these are dictionaries, corresponding to the ``ColumnDescriptor``
291+
structure in the Thrift API. The accepted keys are:
292+
293+
* ``max_versions`` (``int``)
294+
* ``compression`` (``str``)
295+
* ``in_memory`` (``bool``)
296+
* ``bloom_filter_type`` (``str``)
297+
* ``bloom_filter_vector_size`` (``int``)
298+
* ``bloom_filter_nb_hashes`` (``int``)
299+
* ``block_cache_enabled`` (``bool``)
300+
* ``time_to_live`` (``int``)
301+
302+
:type name: str
303+
:param name: The name of the table to be created.
304+
305+
:type families: dict
306+
:param families: Dictionary with column family names as keys and column
307+
family options as the values. The options can be among
308+
309+
* :class:`dict`
310+
* :class:`.GarbageCollectionRule`
311+
312+
:raises: :class:`TypeError <exceptions.TypeError>` if ``families`` is
313+
not a dictionary,
314+
:class:`ValueError <exceptions.ValueError>` if ``families``
315+
has no entries
316+
"""
317+
if not isinstance(families, dict):
318+
raise TypeError('families arg must be a dictionary')
319+
320+
if not families:
321+
raise ValueError('Cannot create table %r (no column '
322+
'families specified)' % (name,))
323+
324+
# Parse all keys before making any API requests.
325+
gc_rule_dict = {}
326+
for column_family_name, option in families.items():
327+
if isinstance(column_family_name, six.binary_type):
328+
column_family_name = column_family_name.decode('utf-8')
329+
if column_family_name.endswith(':'):
330+
column_family_name = column_family_name[:-1]
331+
gc_rule_dict[column_family_name] = _parse_family_option(option)
332+
333+
# Create table instance and then make API calls.
334+
name = self._table_name(name)
335+
low_level_table = _LowLevelTable(name, self._cluster)
336+
low_level_table.create()
337+
338+
for column_family_name, gc_rule in gc_rule_dict.items():
339+
column_family = low_level_table.column_family(
340+
column_family_name, gc_rule=gc_rule)
341+
column_family.create()
342+
267343
def enable_table(self, name):
268344
"""Enable the specified table.
269345
@@ -311,3 +387,51 @@ def compact_table(self, name, major=False):
311387
"""
312388
raise NotImplementedError('The Cloud Bigtable API does not support '
313389
'compacting a table.')
390+
391+
392+
def _parse_family_option(option):
393+
"""Parses a column family option into a garbage collection rule.
394+
395+
.. note::
396+
397+
If ``option`` is not a dictionary, the type is not checked.
398+
If ``option`` is :data:`None`, there is nothing to do, since this
399+
is the correct output.
400+
401+
:type option: :class:`dict`,
402+
:data:`NoneType <types.NoneType>`,
403+
:class:`.GarbageCollectionRule`
404+
:param option: A column family option passes as a dictionary value in
405+
:meth:`Connection.create_table`.
406+
407+
:rtype: :class:`.GarbageCollectionRule`
408+
:returns: A garbage collection rule parsed from the input.
409+
:raises: :class:`ValueError <exceptions.ValueError>` if ``option`` is a
410+
dictionary but keys other than ``max_versions`` and
411+
``time_to_live`` are used.
412+
"""
413+
result = option
414+
if isinstance(result, dict):
415+
if not set(result.keys()) <= set(['max_versions', 'time_to_live']):
416+
raise ValueError('Cloud Bigtable only supports max_versions and '
417+
'time_to_live column family settings',
418+
'Received', result.keys())
419+
420+
max_num_versions = result.get('max_versions')
421+
max_age = None
422+
if 'time_to_live' in result:
423+
max_age = datetime.timedelta(seconds=result['time_to_live'])
424+
425+
if len(result) == 0:
426+
result = None
427+
elif len(result) == 1:
428+
if max_num_versions is None:
429+
result = MaxAgeGCRule(max_age)
430+
else:
431+
result = MaxVersionsGCRule(max_num_versions)
432+
else: # By our check above we know this means len(result) == 2.
433+
rule1 = MaxAgeGCRule(max_age)
434+
rule2 = MaxVersionsGCRule(max_num_versions)
435+
result = GCRuleIntersection(rules=[rule1, rule2])
436+
437+
return result

gcloud/bigtable/happybase/test_connection.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,90 @@ def test_tables_with_prefix(self):
311311
result = connection.tables()
312312
self.assertEqual(result, [unprefixed_table_name1])
313313

314+
def test_create_table(self):
315+
import operator
316+
from gcloud._testing import _Monkey
317+
from gcloud.bigtable.happybase import connection as MUT
318+
319+
cluster = _Cluster() # Avoid implicit environ check.
320+
connection = self._makeOne(autoconnect=False, cluster=cluster)
321+
mock_gc_rule = object()
322+
called_options = []
323+
324+
def mock_parse_family_option(option):
325+
called_options.append(option)
326+
return mock_gc_rule
327+
328+
name = 'table-name'
329+
col_fam1 = 'cf1'
330+
col_fam_option1 = object()
331+
col_fam2 = u'cf2'
332+
col_fam_option2 = object()
333+
col_fam3 = b'cf3'
334+
col_fam_option3 = object()
335+
families = {
336+
col_fam1: col_fam_option1,
337+
# A trailing colon is also allowed.
338+
col_fam2 + ':': col_fam_option2,
339+
col_fam3 + b':': col_fam_option3,
340+
}
341+
342+
tables_created = []
343+
344+
def make_table(*args, **kwargs):
345+
result = _MockLowLevelTable(*args, **kwargs)
346+
tables_created.append(result)
347+
return result
348+
349+
with _Monkey(MUT, _LowLevelTable=make_table,
350+
_parse_family_option=mock_parse_family_option):
351+
connection.create_table(name, families)
352+
353+
# Just one table would have been created.
354+
table_instance, = tables_created
355+
self.assertEqual(table_instance.args, (name, cluster))
356+
self.assertEqual(table_instance.kwargs, {})
357+
self.assertEqual(table_instance.create_calls, 1)
358+
359+
# Check if our mock was called twice, but we don't know the order.
360+
self.assertEqual(
361+
set(called_options),
362+
set([col_fam_option1, col_fam_option2, col_fam_option3]))
363+
364+
# We expect three column family instances created, but don't know the
365+
# order due to non-deterministic dict.items().
366+
col_fam_created = table_instance.col_fam_created
367+
self.assertEqual(len(col_fam_created), 3)
368+
col_fam_created.sort(key=operator.attrgetter('column_family_id'))
369+
self.assertEqual(col_fam_created[0].column_family_id, col_fam1)
370+
self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule)
371+
self.assertEqual(col_fam_created[0].create_calls, 1)
372+
self.assertEqual(col_fam_created[1].column_family_id, col_fam2)
373+
self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule)
374+
self.assertEqual(col_fam_created[1].create_calls, 1)
375+
self.assertEqual(col_fam_created[2].column_family_id,
376+
col_fam3.decode('utf-8'))
377+
self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule)
378+
self.assertEqual(col_fam_created[2].create_calls, 1)
379+
380+
def test_create_table_bad_type(self):
381+
cluster = _Cluster() # Avoid implicit environ check.
382+
connection = self._makeOne(autoconnect=False, cluster=cluster)
383+
384+
name = 'table-name'
385+
families = None
386+
with self.assertRaises(TypeError):
387+
connection.create_table(name, families)
388+
389+
def test_create_table_bad_value(self):
390+
cluster = _Cluster() # Avoid implicit environ check.
391+
connection = self._makeOne(autoconnect=False, cluster=cluster)
392+
393+
name = 'table-name'
394+
families = {}
395+
with self.assertRaises(ValueError):
396+
connection.create_table(name, families)
397+
314398
def test_enable_table(self):
315399
cluster = _Cluster() # Avoid implicit environ check.
316400
connection = self._makeOne(autoconnect=False, cluster=cluster)
@@ -345,6 +429,78 @@ def test_compact_table(self):
345429
connection.compact_table(name, major=major)
346430

347431

432+
class Test__parse_family_option(unittest2.TestCase):
433+
434+
def _callFUT(self, option):
435+
from gcloud.bigtable.happybase.connection import _parse_family_option
436+
return _parse_family_option(option)
437+
438+
def test_dictionary_no_keys(self):
439+
option = {}
440+
result = self._callFUT(option)
441+
self.assertEqual(result, None)
442+
443+
def test_null(self):
444+
option = None
445+
result = self._callFUT(option)
446+
self.assertEqual(result, None)
447+
448+
def test_dictionary_bad_key(self):
449+
option = {'badkey': None}
450+
with self.assertRaises(ValueError):
451+
self._callFUT(option)
452+
453+
def test_dictionary_versions_key(self):
454+
from gcloud.bigtable.column_family import MaxVersionsGCRule
455+
456+
versions = 42
457+
option = {'max_versions': versions}
458+
result = self._callFUT(option)
459+
460+
gc_rule = MaxVersionsGCRule(versions)
461+
self.assertEqual(result, gc_rule)
462+
463+
def test_dictionary_ttl_key(self):
464+
import datetime
465+
from gcloud.bigtable.column_family import MaxAgeGCRule
466+
467+
time_to_live = 24 * 60 * 60
468+
max_age = datetime.timedelta(days=1)
469+
option = {'time_to_live': time_to_live}
470+
result = self._callFUT(option)
471+
472+
gc_rule = MaxAgeGCRule(max_age)
473+
self.assertEqual(result, gc_rule)
474+
475+
def test_dictionary_both_keys(self):
476+
import datetime
477+
from gcloud.bigtable.column_family import GCRuleIntersection
478+
from gcloud.bigtable.column_family import MaxAgeGCRule
479+
from gcloud.bigtable.column_family import MaxVersionsGCRule
480+
481+
versions = 42
482+
time_to_live = 24 * 60 * 60
483+
option = {
484+
'max_versions': versions,
485+
'time_to_live': time_to_live,
486+
}
487+
result = self._callFUT(option)
488+
489+
max_age = datetime.timedelta(days=1)
490+
# NOTE: This relies on the order of the rules in the method we are
491+
# calling matching this order here.
492+
gc_rule1 = MaxAgeGCRule(max_age)
493+
gc_rule2 = MaxVersionsGCRule(versions)
494+
gc_rule = GCRuleIntersection(rules=[gc_rule1, gc_rule2])
495+
self.assertEqual(result, gc_rule)
496+
497+
def test_non_dictionary(self):
498+
option = object()
499+
self.assertFalse(isinstance(option, dict))
500+
result = self._callFUT(option)
501+
self.assertEqual(result, option)
502+
503+
348504
class _Client(object):
349505

350506
def __init__(self, *args, **kwargs):
@@ -385,3 +541,31 @@ def copy(self):
385541

386542
def list_tables(self):
387543
return self.list_tables_result
544+
545+
546+
class _MockLowLevelColumnFamily(object):
547+
548+
def __init__(self, column_family_id, gc_rule=None):
549+
self.column_family_id = column_family_id
550+
self.gc_rule = gc_rule
551+
self.create_calls = 0
552+
553+
def create(self):
554+
self.create_calls += 1
555+
556+
557+
class _MockLowLevelTable(object):
558+
559+
def __init__(self, *args, **kwargs):
560+
self.args = args
561+
self.kwargs = kwargs
562+
self.create_calls = 0
563+
self.col_fam_created = []
564+
565+
def create(self):
566+
self.create_calls += 1
567+
568+
def column_family(self, column_family_id, gc_rule=None):
569+
result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule)
570+
self.col_fam_created.append(result)
571+
return result

0 commit comments

Comments
 (0)