Skip to content

Commit cd6b3e0

Browse files
committed
Adding HappyBase Connection.create_table().
1 parent 33b3bfd commit cd6b3e0

File tree

2 files changed

+310
-0
lines changed

2 files changed

+310
-0
lines changed

gcloud/bigtable/happybase/connection.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,17 @@
1515
"""Google Cloud Bigtable HappyBase connection module."""
1616

1717

18+
import datetime
1819
import warnings
1920

2021
import six
2122

2223
from gcloud.bigtable.client import Client
24+
from gcloud.bigtable.column_family import GCRuleIntersection
25+
from gcloud.bigtable.column_family import MaxAgeGCRule
26+
from gcloud.bigtable.column_family import MaxVersionsGCRule
2327
from gcloud.bigtable.happybase.table import Table
28+
from gcloud.bigtable.table import Table as _LowLevelTable
2429

2530

2631
# Constants reproduced here for HappyBase compatibility, though values
@@ -263,3 +268,122 @@ def tables(self):
263268
if name.startswith(prefix)]
264269

265270
return table_names
271+
272+
def create_table(self, name, families):
273+
"""Create a table.
274+
275+
.. warning::
276+
277+
The only column family options from HappyBase that are able to be
278+
used with Cloud Bigtable are ``max_versions`` and ``time_to_live``.
279+
280+
.. note::
281+
282+
This method is **not** atomic. The Cloud Bigtable API separates
283+
the creation of a table from the creation of column families. Thus
284+
this method needs to send 1 request for the table creation and 1
285+
request for each column family. If any of these fails, the method
286+
will fail, but the progress made towards completion cannot be
287+
rolled back.
288+
289+
Values in ``families`` represent column family options. In HappyBase,
290+
these are dictionaries, corresponding to the ``ColumnDescriptor``
291+
structure in the Thrift API. The accepted keys are:
292+
293+
* ``max_versions`` (``int``)
294+
* ``compression`` (``str``)
295+
* ``in_memory`` (``bool``)
296+
* ``bloom_filter_type`` (``str``)
297+
* ``bloom_filter_vector_size`` (``int``)
298+
* ``bloom_filter_nb_hashes`` (``int``)
299+
* ``block_cache_enabled`` (``bool``)
300+
* ``time_to_live`` (``int``)
301+
302+
:type name: str
303+
:param name: The name of the table to be created.
304+
305+
:type families: dict
306+
:param families: Dictionary with column family names as keys and column
307+
family options as the values. The options can be among
308+
309+
* :class:`dict`
310+
* :class:`.GarbageCollectionRule`
311+
312+
:raises: :class:`TypeError <exceptions.TypeError>` if ``families`` is
313+
not a dictionary,
314+
:class:`ValueError <exceptions.ValueError>` if ``families``
315+
has no entries
316+
"""
317+
if not isinstance(families, dict):
318+
raise TypeError('families arg must be a dictionary')
319+
320+
if not families:
321+
raise ValueError('Cannot create table %r (no column '
322+
'families specified)' % (name,))
323+
324+
# Parse all keys before making any API requests.
325+
gc_rule_dict = {}
326+
for column_family_name, option in families.items():
327+
if isinstance(column_family_name, six.binary_type):
328+
column_family_name = column_family_name.decode('utf-8')
329+
if column_family_name.endswith(':'):
330+
column_family_name = column_family_name[:-1]
331+
gc_rule_dict[column_family_name] = _parse_family_option(option)
332+
333+
# Create table instance and then make API calls.
334+
name = self._table_name(name)
335+
low_level_table = _LowLevelTable(name, self._cluster)
336+
low_level_table.create()
337+
338+
for column_family_name, gc_rule in gc_rule_dict.items():
339+
column_family = low_level_table.column_family(
340+
column_family_name, gc_rule=gc_rule)
341+
column_family.create()
342+
343+
344+
def _parse_family_option(option):
345+
"""Parses a column family option into a garbage collection rule.
346+
347+
.. note::
348+
349+
If ``option`` is not a dictionary, the type is not checked.
350+
If ``option`` is :data:`None`, there is nothing to do, since this
351+
is the correct output.
352+
353+
:type option: :class:`dict`,
354+
:data:`NoneType <types.NoneType>`,
355+
:class:`.GarbageCollectionRule`
356+
:param option: A column family option passes as a dictionary value in
357+
:meth:`Connection.create_table`.
358+
359+
:rtype: :class:`.GarbageCollectionRule`
360+
:returns: A garbage collection rule parsed from the input.
361+
:raises: :class:`ValueError <exceptions.ValueError>` if ``option`` is a
362+
dictionary but keys other than ``max_versions`` and
363+
``time_to_live`` are used.
364+
"""
365+
result = option
366+
if isinstance(result, dict):
367+
if not set(result.keys()) <= set(['max_versions', 'time_to_live']):
368+
raise ValueError('Cloud Bigtable only supports max_versions and '
369+
'time_to_live column family settings',
370+
'Received', result.keys())
371+
372+
max_num_versions = result.get('max_versions')
373+
max_age = None
374+
if 'time_to_live' in result:
375+
max_age = datetime.timedelta(seconds=result['time_to_live'])
376+
377+
if len(result) == 0:
378+
result = None
379+
elif len(result) == 1:
380+
if max_num_versions is None:
381+
result = MaxAgeGCRule(max_age)
382+
else:
383+
result = MaxVersionsGCRule(max_num_versions)
384+
else: # By our check above we know this means len(result) == 2.
385+
rule1 = MaxAgeGCRule(max_age)
386+
rule2 = MaxVersionsGCRule(max_num_versions)
387+
result = GCRuleIntersection(rules=[rule1, rule2])
388+
389+
return result

gcloud/bigtable/happybase/test_connection.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,162 @@ def test_tables_with_prefix(self):
311311
result = connection.tables()
312312
self.assertEqual(result, [unprefixed_table_name1])
313313

314+
def test_create_table(self):
315+
import operator
316+
from gcloud._testing import _Monkey
317+
from gcloud.bigtable.happybase import connection as MUT
318+
319+
cluster = _Cluster() # Avoid implicit environ check.
320+
connection = self._makeOne(autoconnect=False, cluster=cluster)
321+
mock_gc_rule = object()
322+
called_options = []
323+
324+
def mock_parse_family_option(option):
325+
called_options.append(option)
326+
return mock_gc_rule
327+
328+
name = 'table-name'
329+
col_fam1 = 'cf1'
330+
col_fam_option1 = object()
331+
col_fam2 = u'cf2'
332+
col_fam_option2 = object()
333+
col_fam3 = b'cf3'
334+
col_fam_option3 = object()
335+
families = {
336+
col_fam1: col_fam_option1,
337+
# A trailing colon is also allowed.
338+
col_fam2 + ':': col_fam_option2,
339+
col_fam3 + b':': col_fam_option3,
340+
}
341+
342+
tables_created = []
343+
344+
def make_table(*args, **kwargs):
345+
result = _MockLowLevelTable(*args, **kwargs)
346+
tables_created.append(result)
347+
return result
348+
349+
with _Monkey(MUT, _LowLevelTable=make_table,
350+
_parse_family_option=mock_parse_family_option):
351+
connection.create_table(name, families)
352+
353+
# Just one table would have been created.
354+
table_instance, = tables_created
355+
self.assertEqual(table_instance.args, (name, cluster))
356+
self.assertEqual(table_instance.kwargs, {})
357+
self.assertEqual(table_instance.create_calls, 1)
358+
359+
# Check if our mock was called twice, but we don't know the order.
360+
self.assertEqual(
361+
set(called_options),
362+
set([col_fam_option1, col_fam_option2, col_fam_option3]))
363+
364+
# We expect three column family instances created, but don't know the
365+
# order due to non-deterministic dict.items().
366+
col_fam_created = table_instance.col_fam_created
367+
self.assertEqual(len(col_fam_created), 3)
368+
col_fam_created.sort(key=operator.attrgetter('column_family_id'))
369+
self.assertEqual(col_fam_created[0].column_family_id, col_fam1)
370+
self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule)
371+
self.assertEqual(col_fam_created[0].create_calls, 1)
372+
self.assertEqual(col_fam_created[1].column_family_id, col_fam2)
373+
self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule)
374+
self.assertEqual(col_fam_created[1].create_calls, 1)
375+
self.assertEqual(col_fam_created[2].column_family_id,
376+
col_fam3.decode('utf-8'))
377+
self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule)
378+
self.assertEqual(col_fam_created[2].create_calls, 1)
379+
380+
def test_create_table_bad_type(self):
381+
cluster = _Cluster() # Avoid implicit environ check.
382+
connection = self._makeOne(autoconnect=False, cluster=cluster)
383+
384+
name = 'table-name'
385+
families = None
386+
with self.assertRaises(TypeError):
387+
connection.create_table(name, families)
388+
389+
def test_create_table_bad_value(self):
390+
cluster = _Cluster() # Avoid implicit environ check.
391+
connection = self._makeOne(autoconnect=False, cluster=cluster)
392+
393+
name = 'table-name'
394+
families = {}
395+
with self.assertRaises(ValueError):
396+
connection.create_table(name, families)
397+
398+
399+
class Test__parse_family_option(unittest2.TestCase):
400+
401+
def _callFUT(self, option):
402+
from gcloud.bigtable.happybase.connection import _parse_family_option
403+
return _parse_family_option(option)
404+
405+
def test_dictionary_no_keys(self):
406+
option = {}
407+
result = self._callFUT(option)
408+
self.assertEqual(result, None)
409+
410+
def test_null(self):
411+
option = None
412+
result = self._callFUT(option)
413+
self.assertEqual(result, None)
414+
415+
def test_dictionary_bad_key(self):
416+
option = {'badkey': None}
417+
with self.assertRaises(ValueError):
418+
self._callFUT(option)
419+
420+
def test_dictionary_versions_key(self):
421+
from gcloud.bigtable.column_family import MaxVersionsGCRule
422+
423+
versions = 42
424+
option = {'max_versions': versions}
425+
result = self._callFUT(option)
426+
427+
gc_rule = MaxVersionsGCRule(versions)
428+
self.assertEqual(result, gc_rule)
429+
430+
def test_dictionary_ttl_key(self):
431+
import datetime
432+
from gcloud.bigtable.column_family import MaxAgeGCRule
433+
434+
time_to_live = 24 * 60 * 60
435+
max_age = datetime.timedelta(days=1)
436+
option = {'time_to_live': time_to_live}
437+
result = self._callFUT(option)
438+
439+
gc_rule = MaxAgeGCRule(max_age)
440+
self.assertEqual(result, gc_rule)
441+
442+
def test_dictionary_both_keys(self):
443+
import datetime
444+
from gcloud.bigtable.column_family import GCRuleIntersection
445+
from gcloud.bigtable.column_family import MaxAgeGCRule
446+
from gcloud.bigtable.column_family import MaxVersionsGCRule
447+
448+
versions = 42
449+
time_to_live = 24 * 60 * 60
450+
option = {
451+
'max_versions': versions,
452+
'time_to_live': time_to_live,
453+
}
454+
result = self._callFUT(option)
455+
456+
max_age = datetime.timedelta(days=1)
457+
# NOTE: This relies on the order of the rules in the method we are
458+
# calling matching this order here.
459+
gc_rule1 = MaxAgeGCRule(max_age)
460+
gc_rule2 = MaxVersionsGCRule(versions)
461+
gc_rule = GCRuleIntersection(rules=[gc_rule1, gc_rule2])
462+
self.assertEqual(result, gc_rule)
463+
464+
def test_non_dictionary(self):
465+
option = object()
466+
self.assertFalse(isinstance(option, dict))
467+
result = self._callFUT(option)
468+
self.assertEqual(result, option)
469+
314470

315471
class _Client(object):
316472

@@ -350,5 +506,35 @@ def copy(self):
350506
else:
351507
return self
352508

509+
<<<<<<< 33b3bfd533e2e1eb070afe49661483b4fb460c29
353510
def list_tables(self):
354511
return self.list_tables_result
512+
=======
513+
514+
class _MockLowLevelColumnFamily(object):
515+
516+
def __init__(self, column_family_id, gc_rule=None):
517+
self.column_family_id = column_family_id
518+
self.gc_rule = gc_rule
519+
self.create_calls = 0
520+
521+
def create(self):
522+
self.create_calls += 1
523+
524+
525+
class _MockLowLevelTable(object):
526+
527+
def __init__(self, *args, **kwargs):
528+
self.args = args
529+
self.kwargs = kwargs
530+
self.create_calls = 0
531+
self.col_fam_created = []
532+
533+
def create(self):
534+
self.create_calls += 1
535+
536+
def column_family(self, column_family_id, gc_rule=None):
537+
result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule)
538+
self.col_fam_created.append(result)
539+
return result
540+
>>>>>>> Adding HappyBase Connection.create_table().

0 commit comments

Comments
 (0)