From 9304e7890d25de3068d3bdd40021f4b7950090c4 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Sat, 12 Mar 2016 10:46:48 -0800 Subject: [PATCH 1/2] Adding system tests for HappyBase Table.rows(). --- system_tests/bigtable_happybase.py | 176 +++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/system_tests/bigtable_happybase.py b/system_tests/bigtable_happybase.py index a19ea619379a..357804494dde 100644 --- a/system_tests/bigtable_happybase.py +++ b/system_tests/bigtable_happybase.py @@ -13,6 +13,7 @@ # limitations under the License. +import operator import struct import time @@ -25,6 +26,7 @@ _PACK_I64 = struct.Struct('>q').pack +_FIRST_ELT = operator.itemgetter(0) _helpers.PROJECT = TESTS_PROJECT ZONE = 'us-central1-c' NOW_MILLIS = int(1000 * time.time()) @@ -232,6 +234,180 @@ def test_row_with_timestamp(self): }) +class TestTable_rows(BaseTableTest): + + def test_rows(self): + table = Config.TABLE + value1 = 'value1' + value2 = 'value2' + value3 = 'value3' + row1_data = {COL1: value1, COL2: value2} + row2_data = {COL1: value3} + + # Need to clean-up row1 and row2 after. + self.rows_to_delete.append(ROW_KEY1) + self.rows_to_delete.append(ROW_KEY2) + table.put(ROW_KEY1, row1_data) + table.put(ROW_KEY2, row2_data) + + rows = table.rows([ROW_KEY1, ROW_KEY2]) + rows.sort(key=_FIRST_ELT) + + row1, row2 = rows + self.assertEqual(row1, (ROW_KEY1, row1_data)) + self.assertEqual(row2, (ROW_KEY2, row2_data)) + + def test_rows_with_returned_timestamps(self): + table = Config.TABLE + value1 = 'value1' + value2 = 'value2' + value3 = 'value3' + row1_data = {COL1: value1, COL2: value2} + row2_data = {COL1: value3} + + # Need to clean-up row1 and row2 after. + self.rows_to_delete.append(ROW_KEY1) + self.rows_to_delete.append(ROW_KEY2) + with table.batch() as batch: + batch.put(ROW_KEY1, row1_data) + batch.put(ROW_KEY2, row2_data) + + rows = table.rows([ROW_KEY1, ROW_KEY2], include_timestamp=True) + rows.sort(key=_FIRST_ELT) + + row1, row2 = rows + self.assertEqual(row1[0], ROW_KEY1) + self.assertEqual(row2[0], ROW_KEY2) + + # Drop the keys now that we have checked. + _, row1 = row1 + _, row2 = row2 + + ts = row1[COL1][1] + # All will have the same timestamp since we used batch. + expected_row1_result = {COL1: (value1, ts), COL2: (value2, ts)} + self.assertEqual(row1, expected_row1_result) + # NOTE: This method was written before Cloud Bigtable had the concept + # of batching, so each mutation is sent individually. (This + # will be the case until support for the MutateRows() RPC method + # is implemented.) Thus, the server-side timestamps correspond + # to separate calls to row.commit(). We could circumvent this by + # manually using the local time and storing it on mutations + # before sending. + ts3 = row2[COL1][1] + expected_row2_result = {COL1: (value3, ts3)} + self.assertEqual(row2, expected_row2_result) + + def test_rows_with_columns(self): + table = Config.TABLE + value1 = 'value1' + value2 = 'value2' + value3 = 'value3' + row1_data = {COL1: value1, COL2: value2} + row2_data = {COL1: value3} + + # Need to clean-up row1 and row2 after. + self.rows_to_delete.append(ROW_KEY1) + self.rows_to_delete.append(ROW_KEY2) + table.put(ROW_KEY1, row1_data) + table.put(ROW_KEY2, row2_data) + + # Filter a single column present in both rows. + rows_col1 = table.rows([ROW_KEY1, ROW_KEY2], columns=[COL1]) + rows_col1.sort(key=_FIRST_ELT) + row1, row2 = rows_col1 + self.assertEqual(row1, (ROW_KEY1, {COL1: value1})) + self.assertEqual(row2, (ROW_KEY2, {COL1: value3})) + + # Filter a column not present in one row. + rows_col2 = table.rows([ROW_KEY1, ROW_KEY2], columns=[COL2]) + self.assertEqual(rows_col2, [(ROW_KEY1, {COL2: value2})]) + + # Filter a column family. + rows_col_fam1 = table.rows([ROW_KEY1, ROW_KEY2], columns=[COL_FAM1]) + rows_col_fam1.sort(key=_FIRST_ELT) + row1, row2 = rows_col_fam1 + self.assertEqual(row1, (ROW_KEY1, row1_data)) + self.assertEqual(row2, (ROW_KEY2, row2_data)) + + # Filter a column family with no entries. + rows_col_fam2 = table.rows([ROW_KEY1, ROW_KEY2], columns=[COL_FAM2]) + self.assertEqual(rows_col_fam2, []) + + # Filter a column family that overlaps with a column. + rows_col_fam_overlap1 = table.rows([ROW_KEY1, ROW_KEY2], + columns=[COL1, COL_FAM1]) + rows_col_fam_overlap1.sort(key=_FIRST_ELT) + row1, row2 = rows_col_fam_overlap1 + self.assertEqual(row1, (ROW_KEY1, row1_data)) + self.assertEqual(row2, (ROW_KEY2, row2_data)) + + # Filter a column family that overlaps with a column (opposite order). + rows_col_fam_overlap2 = table.rows([ROW_KEY1, ROW_KEY2], + columns=[COL_FAM1, COL1]) + rows_col_fam_overlap2.sort(key=_FIRST_ELT) + row1, row2 = rows_col_fam_overlap2 + self.assertEqual(row1, (ROW_KEY1, row1_data)) + self.assertEqual(row2, (ROW_KEY2, row2_data)) + + def test_rows_with_timestamp(self): + table = Config.TABLE + value1 = 'value1' + value2 = 'value2' + value3 = 'value3' + value4 = 'value4' + + # Need to clean-up row1 and row2 after. + self.rows_to_delete.append(ROW_KEY1) + self.rows_to_delete.append(ROW_KEY2) + table.put(ROW_KEY1, {COL1: value1}) + table.put(ROW_KEY2, {COL1: value2}) + table.put(ROW_KEY1, {COL2: value3}) + table.put(ROW_KEY1, {COL4: value4}) + + # Just grab the timestamps + rows = table.rows([ROW_KEY1, ROW_KEY2], include_timestamp=True) + rows.sort(key=_FIRST_ELT) + row1, row2 = rows + self.assertEqual(row1[0], ROW_KEY1) + self.assertEqual(row2[0], ROW_KEY2) + _, row1 = row1 + _, row2 = row2 + ts1 = row1[COL1][1] + ts2 = row2[COL1][1] + ts3 = row1[COL2][1] + ts4 = row1[COL4][1] + + # Make sure the timestamps are (strictly) ascending. + self.assertTrue(ts1 < ts2 < ts3 < ts4) + + # Rows before the third timestamp (assumes exclusive endpoint). + rows = table.rows([ROW_KEY1, ROW_KEY2], timestamp=ts3, + include_timestamp=True) + rows.sort(key=_FIRST_ELT) + row1, row2 = rows + self.assertEqual(row1, (ROW_KEY1, {COL1: (value1, ts1)})) + self.assertEqual(row2, (ROW_KEY2, {COL1: (value2, ts2)})) + + # All writes (bump the exclusive endpoint by 1 millisecond). + rows = table.rows([ROW_KEY1, ROW_KEY2], timestamp=ts4 + 1, + include_timestamp=True) + rows.sort(key=_FIRST_ELT) + row1, row2 = rows + row1_all_data = { + COL1: (value1, ts1), + COL2: (value3, ts3), + COL4: (value4, ts4), + } + self.assertEqual(row1, (ROW_KEY1, row1_all_data)) + self.assertEqual(row2, (ROW_KEY2, {COL1: (value2, ts2)})) + + # First three writes, restricted to column 2. + rows = table.rows([ROW_KEY1, ROW_KEY2], timestamp=ts4, + columns=[COL2], include_timestamp=True) + self.assertEqual(rows, [(ROW_KEY1, {COL2: (value3, ts3)})]) + + class TestTableCounterMethods(BaseTableTest): def test_counter_get(self): From 5beebad4a347788875420c0bc0501a6229a80fec Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Sun, 13 Mar 2016 13:10:47 -0700 Subject: [PATCH 2/2] Replacing uses of sort() with sorted in HappyBase system tests. --- system_tests/bigtable_happybase.py | 46 ++++++++++++++---------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/system_tests/bigtable_happybase.py b/system_tests/bigtable_happybase.py index 357804494dde..48cb44ce8a05 100644 --- a/system_tests/bigtable_happybase.py +++ b/system_tests/bigtable_happybase.py @@ -250,9 +250,7 @@ def test_rows(self): table.put(ROW_KEY1, row1_data) table.put(ROW_KEY2, row2_data) - rows = table.rows([ROW_KEY1, ROW_KEY2]) - rows.sort(key=_FIRST_ELT) - + rows = sorted(table.rows([ROW_KEY1, ROW_KEY2]), key=_FIRST_ELT) row1, row2 = rows self.assertEqual(row1, (ROW_KEY1, row1_data)) self.assertEqual(row2, (ROW_KEY2, row2_data)) @@ -272,9 +270,8 @@ def test_rows_with_returned_timestamps(self): batch.put(ROW_KEY1, row1_data) batch.put(ROW_KEY2, row2_data) - rows = table.rows([ROW_KEY1, ROW_KEY2], include_timestamp=True) - rows.sort(key=_FIRST_ELT) - + rows = sorted(table.rows([ROW_KEY1, ROW_KEY2], include_timestamp=True), + key=_FIRST_ELT) row1, row2 = rows self.assertEqual(row1[0], ROW_KEY1) self.assertEqual(row2[0], ROW_KEY2) @@ -313,8 +310,8 @@ def test_rows_with_columns(self): table.put(ROW_KEY2, row2_data) # Filter a single column present in both rows. - rows_col1 = table.rows([ROW_KEY1, ROW_KEY2], columns=[COL1]) - rows_col1.sort(key=_FIRST_ELT) + rows_col1 = sorted(table.rows([ROW_KEY1, ROW_KEY2], columns=[COL1]), + key=_FIRST_ELT) row1, row2 = rows_col1 self.assertEqual(row1, (ROW_KEY1, {COL1: value1})) self.assertEqual(row2, (ROW_KEY2, {COL1: value3})) @@ -324,8 +321,9 @@ def test_rows_with_columns(self): self.assertEqual(rows_col2, [(ROW_KEY1, {COL2: value2})]) # Filter a column family. - rows_col_fam1 = table.rows([ROW_KEY1, ROW_KEY2], columns=[COL_FAM1]) - rows_col_fam1.sort(key=_FIRST_ELT) + rows_col_fam1 = sorted( + table.rows([ROW_KEY1, ROW_KEY2], columns=[COL_FAM1]), + key=_FIRST_ELT) row1, row2 = rows_col_fam1 self.assertEqual(row1, (ROW_KEY1, row1_data)) self.assertEqual(row2, (ROW_KEY2, row2_data)) @@ -335,17 +333,17 @@ def test_rows_with_columns(self): self.assertEqual(rows_col_fam2, []) # Filter a column family that overlaps with a column. - rows_col_fam_overlap1 = table.rows([ROW_KEY1, ROW_KEY2], - columns=[COL1, COL_FAM1]) - rows_col_fam_overlap1.sort(key=_FIRST_ELT) + rows_col_fam_overlap1 = sorted(table.rows([ROW_KEY1, ROW_KEY2], + columns=[COL1, COL_FAM1]), + key=_FIRST_ELT) row1, row2 = rows_col_fam_overlap1 self.assertEqual(row1, (ROW_KEY1, row1_data)) self.assertEqual(row2, (ROW_KEY2, row2_data)) # Filter a column family that overlaps with a column (opposite order). - rows_col_fam_overlap2 = table.rows([ROW_KEY1, ROW_KEY2], - columns=[COL_FAM1, COL1]) - rows_col_fam_overlap2.sort(key=_FIRST_ELT) + rows_col_fam_overlap2 = sorted(table.rows([ROW_KEY1, ROW_KEY2], + columns=[COL_FAM1, COL1]), + key=_FIRST_ELT) row1, row2 = rows_col_fam_overlap2 self.assertEqual(row1, (ROW_KEY1, row1_data)) self.assertEqual(row2, (ROW_KEY2, row2_data)) @@ -366,8 +364,8 @@ def test_rows_with_timestamp(self): table.put(ROW_KEY1, {COL4: value4}) # Just grab the timestamps - rows = table.rows([ROW_KEY1, ROW_KEY2], include_timestamp=True) - rows.sort(key=_FIRST_ELT) + rows = sorted(table.rows([ROW_KEY1, ROW_KEY2], include_timestamp=True), + key=_FIRST_ELT) row1, row2 = rows self.assertEqual(row1[0], ROW_KEY1) self.assertEqual(row2[0], ROW_KEY2) @@ -382,17 +380,17 @@ def test_rows_with_timestamp(self): self.assertTrue(ts1 < ts2 < ts3 < ts4) # Rows before the third timestamp (assumes exclusive endpoint). - rows = table.rows([ROW_KEY1, ROW_KEY2], timestamp=ts3, - include_timestamp=True) - rows.sort(key=_FIRST_ELT) + rows = sorted(table.rows([ROW_KEY1, ROW_KEY2], timestamp=ts3, + include_timestamp=True), + key=_FIRST_ELT) row1, row2 = rows self.assertEqual(row1, (ROW_KEY1, {COL1: (value1, ts1)})) self.assertEqual(row2, (ROW_KEY2, {COL1: (value2, ts2)})) # All writes (bump the exclusive endpoint by 1 millisecond). - rows = table.rows([ROW_KEY1, ROW_KEY2], timestamp=ts4 + 1, - include_timestamp=True) - rows.sort(key=_FIRST_ELT) + rows = sorted(table.rows([ROW_KEY1, ROW_KEY2], timestamp=ts4 + 1, + include_timestamp=True), + key=_FIRST_ELT) row1, row2 = rows row1_all_data = { COL1: (value1, ts1),