From 16b04c935099b5bf45692223b18f03633df39a56 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Sat, 25 Jun 2016 10:56:22 -0400 Subject: [PATCH 1/2] Parse JSON acceptance tests. --- .../bigtable/read-rows-acceptance-test.json | 1178 +++++++++++++++++ gcloud/bigtable/test_row_data.py | 31 + 2 files changed, 1209 insertions(+) create mode 100644 gcloud/bigtable/read-rows-acceptance-test.json diff --git a/gcloud/bigtable/read-rows-acceptance-test.json b/gcloud/bigtable/read-rows-acceptance-test.json new file mode 100644 index 000000000000..4973831f4979 --- /dev/null +++ b/gcloud/bigtable/read-rows-acceptance-test.json @@ -0,0 +1,1178 @@ +{ + "tests": [ + { + "name": "invalid - no commit", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - no cell key before commit", + "chunks": [ + "commit_row: true\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - no cell key before value", + "chunks": [ + "timestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - new col family must specify qualifier", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "family_name: \u003c\n value: \"B\"\n\u003e\ntimestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "bare commit implies ts=0", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n", + "commit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 0, + "value": "", + "label": "", + "error": false + } + ] + }, + { + "name": "simple row with timestamp", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + } + ] + }, + { + "name": "missing timestamp, implied ts=0", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 0, + "value": "value-VAL", + "label": "", + "error": false + } + ] + }, + { + "name": "empty cell value", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 0, + "value": "", + "label": "", + "error": false + } + ] + }, + { + "name": "two unsplit cells", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "timestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "two qualifiers", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "qualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "D", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "two families", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "family_name: \u003c\n value: \"B\"\n\u003e\nqualifier: \u003c\n value: \"E\"\n\u003e\ntimestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "B", + "qual": "E", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "with labels", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nlabels: \"L_1\"\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "timestamp_micros: 102\nlabels: \"L_2\"\nvalue: \"value-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "L_1", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 102, + "value": "value-VAL_2", + "label": "L_2", + "error": false + } + ] + }, + { + "name": "split cell, bare commit", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL\"\ncommit_row: false\n", + "commit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 0, + "value": "", + "label": "", + "error": false + } + ] + }, + { + "name": "split cell", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + } + ] + }, + { + "name": "split four ways", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nlabels: \"L\"\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"a\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"l\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"ue-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "L", + "error": false + } + ] + }, + { + "name": "two split cells", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL_1\"\ncommit_row: false\n", + "timestamp_micros: 102\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "multi-qualifier splits", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL_1\"\ncommit_row: false\n", + "qualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 102\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "D", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "multi-qualifier multi-split", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"a\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"lue-VAL_1\"\ncommit_row: false\n", + "qualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 102\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"a\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"lue-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "D", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "multi-family split", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL_1\"\ncommit_row: false\n", + "family_name: \u003c\n value: \"B\"\n\u003e\nqualifier: \u003c\n value: \"E\"\n\u003e\ntimestamp_micros: 102\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "B", + "qual": "E", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "invalid - no commit between rows", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - no commit after first row", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - last row missing commit", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + }, + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - duplicate row key", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n", + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"B\"\n\u003e\nqualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + }, + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - new row missing row key", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n", + "timestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + }, + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "two rows", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + } + ] + }, + { + "name": "two rows implicit timestamp", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\nvalue: \"value-VAL\"\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 0, + "value": "value-VAL", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + } + ] + }, + { + "name": "two rows empty value", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 0, + "value": "", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + } + ] + }, + { + "name": "two rows, one with multiple cells", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "timestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"B\"\n\u003e\nqualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 103\nvalue: \"value-VAL_3\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "B", + "qual": "D", + "ts": 103, + "value": "value-VAL_3", + "label": "", + "error": false + } + ] + }, + { + "name": "two rows, multiple cells", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "qualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"B\"\n\u003e\nqualifier: \u003c\n value: \"E\"\n\u003e\ntimestamp_micros: 103\nvalue: \"value-VAL_3\"\ncommit_row: false\n", + "qualifier: \u003c\n value: \"F\"\n\u003e\ntimestamp_micros: 104\nvalue: \"value-VAL_4\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK_1", + "fm": "A", + "qual": "D", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "B", + "qual": "E", + "ts": 103, + "value": "value-VAL_3", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "B", + "qual": "F", + "ts": 104, + "value": "value-VAL_4", + "label": "", + "error": false + } + ] + }, + { + "name": "two rows, multiple cells, multiple families", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "family_name: \u003c\n value: \"B\"\n\u003e\nqualifier: \u003c\n value: \"E\"\n\u003e\ntimestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"M\"\n\u003e\nqualifier: \u003c\n value: \"O\"\n\u003e\ntimestamp_micros: 103\nvalue: \"value-VAL_3\"\ncommit_row: false\n", + "family_name: \u003c\n value: \"N\"\n\u003e\nqualifier: \u003c\n value: \"P\"\n\u003e\ntimestamp_micros: 104\nvalue: \"value-VAL_4\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK_1", + "fm": "B", + "qual": "E", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "M", + "qual": "O", + "ts": 103, + "value": "value-VAL_3", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "N", + "qual": "P", + "ts": 104, + "value": "value-VAL_4", + "label": "", + "error": false + } + ] + }, + { + "name": "two rows, four cells, 2 labels", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 101\nlabels: \"L_1\"\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "timestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"B\"\n\u003e\nqualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 103\nlabels: \"L_3\"\nvalue: \"value-VAL_3\"\ncommit_row: false\n", + "timestamp_micros: 104\nvalue: \"value-VAL_4\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 101, + "value": "value-VAL_1", + "label": "L_1", + "error": false + }, + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 102, + "value": "value-VAL_2", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "B", + "qual": "D", + "ts": 103, + "value": "value-VAL_3", + "label": "L_3", + "error": false + }, + { + "rk": "RK_2", + "fm": "B", + "qual": "D", + "ts": 104, + "value": "value-VAL_4", + "label": "", + "error": false + } + ] + }, + { + "name": "two rows with splits, same timestamp", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL_1\"\ncommit_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"alue-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL_1", + "label": "", + "error": false + }, + { + "rk": "RK_2", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "invalid - bare reset", + "chunks": [ + "reset_row: true\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - bad reset, no commit", + "chunks": [ + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - missing key after reset", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n", + "reset_row: true\n", + "timestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "no data after reset", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n", + "reset_row: true\n" + ], + "results": null + }, + { + "name": "simple reset", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + } + ] + }, + { + "name": "reset to new val", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "reset to new qual", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "D", + "ts": 100, + "value": "value-VAL_1", + "label": "", + "error": false + } + ] + }, + { + "name": "reset with splits", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "timestamp_micros: 102\nvalue: \"value-VAL_2\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "reset two cells", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_2\"\ncommit_row: false\n", + "timestamp_micros: 103\nvalue: \"value-VAL_3\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL_2", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 103, + "value": "value-VAL_3", + "label": "", + "error": false + } + ] + }, + { + "name": "two resets", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_2\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_3\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL_3", + "label": "", + "error": false + } + ] + }, + { + "name": "reset then two cells", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"B\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_2\"\ncommit_row: false\n", + "qualifier: \u003c\n value: \"D\"\n\u003e\ntimestamp_micros: 103\nvalue: \"value-VAL_3\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "B", + "qual": "C", + "ts": 100, + "value": "value-VAL_2", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "B", + "qual": "D", + "ts": 103, + "value": "value-VAL_3", + "label": "", + "error": false + } + ] + }, + { + "name": "reset to new row", + "chunks": [ + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK_2\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_2\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_2", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL_2", + "label": "", + "error": false + } + ] + }, + { + "name": "reset in between chunks", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nlabels: \"L\"\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"a\"\nvalue_size: 10\ncommit_row: false\n", + "reset_row: true\n", + "row_key: \"RK_1\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL_1\"\ncommit_row: true\n" + ], + "results": [ + { + "rk": "RK_1", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL_1", + "label": "", + "error": false + } + ] + }, + { + "name": "invalid - reset with chunk", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nlabels: \"L\"\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"a\"\nvalue_size: 10\nreset_row: true\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "invalid - commit with chunk", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nlabels: \"L\"\nvalue: \"v\"\nvalue_size: 10\ncommit_row: false\n", + "value: \"a\"\nvalue_size: 10\ncommit_row: true\n" + ], + "results": [ + { + "rk": "", + "fm": "", + "qual": "", + "ts": 0, + "value": "", + "label": "", + "error": true + } + ] + }, + { + "name": "empty cell chunk", + "chunks": [ + "row_key: \"RK\"\nfamily_name: \u003c\n value: \"A\"\n\u003e\nqualifier: \u003c\n value: \"C\"\n\u003e\ntimestamp_micros: 100\nvalue: \"value-VAL\"\ncommit_row: false\n", + "commit_row: false\n", + "commit_row: true\n" + ], + "results": [ + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 100, + "value": "value-VAL", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 0, + "value": "", + "label": "", + "error": false + }, + { + "rk": "RK", + "fm": "A", + "qual": "C", + "ts": 0, + "value": "", + "label": "", + "error": false + } + ] + } + ] +} \ No newline at end of file diff --git a/gcloud/bigtable/test_row_data.py b/gcloud/bigtable/test_row_data.py index 56b1c15f0655..4dd0a8cd1723 100644 --- a/gcloud/bigtable/test_row_data.py +++ b/gcloud/bigtable/test_row_data.py @@ -522,3 +522,34 @@ def cancel(self): def next(self): return next(self.iter_values) + + +def _generate_cell_chunks(chunk_text_pbs): + from google.protobuf.text_format import Merge + from gcloud.bigtable._generated_v2.bigtable_pb2 import ReadRowsResponse + + chunks = [] + + for chunk_text_pb in chunk_text_pbs: + chunk = ReadRowsResponse.CellChunk() + chunks.append(Merge(chunk_text_pb, chunk)) + + return chunks + + +def _parse_readrows_acceptance_tests(filename): + """Parse acceptance tests from JSON + + See: + https://github.com/GoogleCloudPlatform/cloud-bigtable-client/blob/master/bigtable-client-core/src/test/resources/com/google/cloud/bigtable/grpc/scanner/v2/read-rows-acceptance-test.json + """ + import json + + with open(filename) as json_file: + test_json = json.load(json_file) + + for test in test_json['tests']: + name = test['name'] + chunks = _generate_cell_chunks(test['chunks']) + results = test['results'] + yield name, chunks, results From ebf27b58ce063e5ad3b3c029e99a1799d3bf13d6 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Sat, 25 Jun 2016 20:40:23 -0400 Subject: [PATCH 2/2] Add 'ReadRowsResponseV2' wrapper. Processes the 'ReadRowsResponse.CellChunk' state machine. Add tests based on JSON acceptance tests: https://github.com/GoogleCloudPlatform/cloud-bigtable-client/blob/master/bigtable-client-core/src/test/resources/com/google/cloud/bigtable/grpc/scanner/v2/read-rows-acceptance-test.json. --- gcloud/bigtable/row_data.py | 252 ++++++++++++++++++++ gcloud/bigtable/test_row_data.py | 392 +++++++++++++++++++++++++++++++ 2 files changed, 644 insertions(+) diff --git a/gcloud/bigtable/row_data.py b/gcloud/bigtable/row_data.py index e64a242f8507..5191a1759b16 100644 --- a/gcloud/bigtable/row_data.py +++ b/gcloud/bigtable/row_data.py @@ -321,3 +321,255 @@ def consume_all(self, max_loops=None): self.consume_next() except StopIteration: break + + +class ReadRowsResponseError(RuntimeError): + """Exception raised to to invalid chunk / response data from back-end.""" + + +def _raise_if(predicate): + """Helper for validation methods.""" + if predicate: + raise ReadRowsResponseError() + + +class PartialCellV2(object): + """Data for a not-yet-complete cell.""" + + def __init__(self, row_key, family_name, qualifier, timestamp_micros, + labels=(), value=b''): + self.row_key = row_key + self.family_name = family_name + self.qualifier = qualifier + self.timestamp_micros = timestamp_micros + self.labels = labels + self.value = value + + def append_value(self, value): + """Append bytes from a new chunk to value. + + :type value: bytes + :param value: bytes to append + """ + self.value += value + + +class PartialRowsDataV2(object): + """Handle state involved in consuming a ``ReadRows`` streaming response. + + :type response_iterator: + :class:`grpc.framework.alpha._reexport._CancellableIterator` returning + :class:`gcloud.bigtable._generated_v2.bigtable_pb2.ReadRowsResponse` + :param response_iterator: + A streaming iterator returned from a ``ReadRows`` request. + """ + # State names + START = "Start" + NEW_ROW = "New row" + ROW_IN_PROGRESS = "Row in progress" + CELL_IN_PROGRESS = "Cell in progress" + + def __init__(self, response_iterator): + self._response_iterator = response_iterator + # Fully-processed rows, keyed by `row_key` + self._rows = {} + # Counter for responses pulled from iterator + self._counter = 0 + # Maybe cached from previous response + self._last_scanned_row_key = None + # In-progress row, unset until first response, after commit/reset + self._row = None + # Last complete row, unset until first commit + self._previous_row = None + # In-progress cell, unset until first response, after completion + self._cell = None + # Last complete cell, unset until first completion, after new row + self._previous_cell = None + + @property + def state(self): + """Name of state machine state.""" + if self._last_scanned_row_key is None: + return self.START + if self._row is None: + assert self._cell is None + assert self._previous_cell is None + return self.NEW_ROW + if self._cell is not None: + return self.CELL_IN_PROGRESS + if self._previous_cell is not None: + return self.ROW_IN_PROGRESS + return self.NEW_ROW # row added, no chunk yet processed + + @property + def rows(self): + """Property returning all rows accumulated from the stream. + + :rtype: dict + :returns: Dictionary of :class:`PartialRowData`. + """ + _raise_if(self.state not in (self.NEW_ROW,)) + # NOTE: To avoid duplicating large objects, this is just the + # mutable private data. + return self._rows + + @staticmethod + def _validate_chunk_status(chunk): + """Helper for :meth:`_validate_chunk_row_in_progress`, etc.""" + # No reseet with other keys + if chunk.reset_row: + _raise_if(chunk.row_key) + _raise_if(chunk.HasField('family_name')) + _raise_if(chunk.HasField('qualifier')) + _raise_if(chunk.timestamp_micros) + _raise_if(chunk.labels) + _raise_if(chunk.value_size) + _raise_if(chunk.value) + # No commit with value size + _raise_if(chunk.commit_row and chunk.value_size > 0) + # No negative value_size (inferred as a general constraint). + _raise_if(chunk.value_size < 0) + + def _validate_chunk_new_row(self, chunk): + """Helper for :meth:`_validate_chunk`.""" + assert self.state == self.NEW_ROW + _raise_if(chunk.reset_row) + _raise_if(not chunk.row_key) + _raise_if(not chunk.family_name) + _raise_if(not chunk.qualifier) + # This constraint is not enforced in the Go example. + _raise_if(chunk.value_size > 0 and chunk.commit_row is not False) + # This constraint is from the Go example, not the spec. + _raise_if(self._previous_row is not None and + chunk.row_key <= self._previous_row.row_key) + + def _same_as_previous(self, chunk): + """Helper for :meth:`_validate_chunk_row_in_progress`""" + previous = self._previous_cell + return (chunk.row_key == previous.row_key and + chunk.family_name == previous.family_name and + chunk.qualifier == previous.qualifier and + chunk.labels == previous.labels) + + def _validate_chunk_row_in_progress(self, chunk): + """Helper for :meth:`_validate_chunk`""" + assert self.state == self.ROW_IN_PROGRESS + self._validate_chunk_status(chunk) + if not chunk.HasField('commit_row') and not chunk.reset_row: + _raise_if(not chunk.timestamp_micros or not chunk.value) + _raise_if(chunk.row_key and + chunk.row_key != self._row.row_key) + _raise_if(chunk.HasField('family_name') and + not chunk.HasField('qualifier')) + previous = self._previous_cell + _raise_if(self._same_as_previous(chunk) and + chunk.timestamp_micros <= previous.timestamp_micros) + + def _validate_chunk_cell_in_progress(self, chunk): + """Helper for :meth:`_validate_chunk`""" + assert self.state == self.CELL_IN_PROGRESS + self._validate_chunk_status(chunk) + self._copy_from_current(chunk) + + def _validate_chunk(self, chunk): + """Helper for :meth:`consume_next`.""" + if self.state == self.NEW_ROW: + self._validate_chunk_new_row(chunk) + if self.state == self.ROW_IN_PROGRESS: + self._validate_chunk_row_in_progress(chunk) + if self.state == self.CELL_IN_PROGRESS: + self._validate_chunk_cell_in_progress(chunk) + + def _save_current_cell(self): + """Helper for :meth:`consume_next`.""" + row, cell = self._row, self._cell + family = row._cells.setdefault(cell.family_name, {}) + qualified = family.setdefault(cell.qualifier, []) + complete = Cell.from_pb(self._cell) + qualified.append(complete) + self._cell, self._previous_cell = None, cell + + def _copy_from_current(self, chunk): + """Helper for :meth:`consume_next`.""" + current = self._cell + if current is not None: + if not chunk.row_key: + chunk.row_key = current.row_key + if not chunk.HasField('family_name'): + chunk.family_name.value = current.family_name + if not chunk.HasField('qualifier'): + chunk.qualifier.value = current.qualifier + if not chunk.timestamp_micros: + chunk.timestamp_micros = current.timestamp_micros + if not chunk.labels: + chunk.labels.extend(current.labels) + + def _copy_from_previous(self, cell): + """Helper for :meth:`consume_next`.""" + previous = self._previous_cell + if previous is not None: + if not cell.row_key: + cell.row_key = previous.row_key + if not cell.family_name: + cell.family_name = previous.family_name + if not cell.qualifier: + cell.qualifier = previous.qualifier + + def _save_current_row(self): + """Helper for :meth:`consume_next`.""" + if self._cell: + self._save_current_cell() + self._rows[self._row.row_key] = self._row + self._row, self._previous_row = None, self._row + self._previous_cell = None + + def consume_next(self): + """Consume the next ``ReadRowsResponse`` from the stream. + + Parse the response and its chunks into a new/existing row in + :attr:`_rows` + """ + response = self._response_iterator.next() + self._counter += 1 + + if self._last_scanned_row_key is None: # first response + if response.last_scanned_row_key: + raise ReadRowsResponseError() + + self._last_scanned_row_key = response.last_scanned_row_key + + row = self._row + cell = self._cell + + for chunk in response.chunks: + + self._validate_chunk(chunk) + + if chunk.reset_row: + row = self._row = None + cell = self._cell = self._previous_cell = None + continue + + if row is None: + row = self._row = PartialRowData(chunk.row_key) + + if cell is None: + cell = self._cell = PartialCellV2( + chunk.row_key, + chunk.family_name.value, + chunk.qualifier.value, + chunk.timestamp_micros, + chunk.labels, + chunk.value) + self._copy_from_previous(cell) + else: + cell.append_value(chunk.value) + + if chunk.commit_row: + self._save_current_row() + row = cell = None + continue + + if chunk.value_size == 0: + self._save_current_cell() + cell = None diff --git a/gcloud/bigtable/test_row_data.py b/gcloud/bigtable/test_row_data.py index 4dd0a8cd1723..ef7690505d90 100644 --- a/gcloud/bigtable/test_row_data.py +++ b/gcloud/bigtable/test_row_data.py @@ -510,6 +510,373 @@ def test_consume_all_with_max_loops(self): self.assertEqual(list(response_iterator.iter_values), [value2, value3]) +class TestPartialRowsDataV2(unittest2.TestCase): + + _json_tests = None + + def _getTargetClass(self): + from gcloud.bigtable.row_data import PartialRowsDataV2 + return PartialRowsDataV2 + + def _makeOne(self, *args, **kwargs): + return self._getTargetClass()(*args, **kwargs) + + def _load_json_test(self, test_name): + import os + if self.__class__._json_tests is None: + dirname = os.path.dirname(__file__) + filename = os.path.join(dirname, 'read-rows-acceptance-test.json') + raw = _parse_readrows_acceptance_tests(filename) + tests = self.__class__._json_tests = {} + for (name, chunks, results) in raw: + tests[name] = chunks, results + return self.__class__._json_tests[test_name] + + # Not part of the JSON acceptance tests. + + def test_state_start(self): + prd = self._makeOne([]) + self.assertEqual(prd.state, prd.START) + + def test_state_new_row_w_row(self): + prd = self._makeOne([]) + prd._last_scanned_row_key = '' + prd._row = object() + self.assertEqual(prd.state, prd.NEW_ROW) + + def test__copy_from_current_unset(self): + prd = self._makeOne([]) + chunks = _generate_cell_chunks(['']) + chunk = chunks[0] + prd._copy_from_current(chunk) + self.assertEqual(chunk.row_key, b'') + self.assertEqual(chunk.family_name.value, u'') + self.assertEqual(chunk.qualifier.value, b'') + self.assertEqual(chunk.timestamp_micros, 0) + self.assertEqual(chunk.labels, []) + + def test__copy_from_current_blank(self): + ROW_KEY = b'RK' + FAMILY_NAME = u'A' + QUALIFIER = b'C' + TIMESTAMP_MICROS = 100 + LABELS = ['L1', 'L2'] + prd = self._makeOne([]) + prd._cell = _PartialCellV2() + chunks = _generate_cell_chunks(['']) + chunk = chunks[0] + chunk.row_key = ROW_KEY + chunk.family_name.value = FAMILY_NAME + chunk.qualifier.value = QUALIFIER + chunk.timestamp_micros = TIMESTAMP_MICROS + chunk.labels.extend(LABELS) + prd._copy_from_current(chunk) + self.assertEqual(chunk.row_key, ROW_KEY) + self.assertEqual(chunk.family_name.value, FAMILY_NAME) + self.assertEqual(chunk.qualifier.value, QUALIFIER) + self.assertEqual(chunk.timestamp_micros, TIMESTAMP_MICROS) + self.assertEqual(chunk.labels, LABELS) + + def test__copy_from_previous_unset(self): + prd = self._makeOne([]) + cell = _PartialCellV2() + prd._copy_from_previous(cell) + self.assertEqual(cell.row_key, '') + self.assertEqual(cell.family_name, u'') + self.assertEqual(cell.qualifier, b'') + self.assertEqual(cell.timestamp_micros, 0) + self.assertEqual(cell.labels, []) + + def test__copy_from_previous_blank(self): + ROW_KEY = 'RK' + FAMILY_NAME = u'A' + QUALIFIER = b'C' + TIMESTAMP_MICROS = 100 + LABELS = ['L1', 'L2'] + prd = self._makeOne([]) + cell = _PartialCellV2( + row_key=ROW_KEY, + family_name=FAMILY_NAME, + qualifier=QUALIFIER, + timestamp_micros=TIMESTAMP_MICROS, + labels=LABELS, + ) + prd._previous_cell = _PartialCellV2() + prd._copy_from_previous(cell) + self.assertEqual(cell.row_key, ROW_KEY) + self.assertEqual(cell.family_name, FAMILY_NAME) + self.assertEqual(cell.qualifier, QUALIFIER) + self.assertEqual(cell.timestamp_micros, TIMESTAMP_MICROS) + self.assertEqual(cell.labels, LABELS) + + def test__copy_from_previous_filled(self): + ROW_KEY = 'RK' + FAMILY_NAME = u'A' + QUALIFIER = b'C' + TIMESTAMP_MICROS = 100 + LABELS = ['L1', 'L2'] + prd = self._makeOne([]) + prd._previous_cell = _PartialCellV2( + row_key=ROW_KEY, + family_name=FAMILY_NAME, + qualifier=QUALIFIER, + timestamp_micros=TIMESTAMP_MICROS, + labels=LABELS, + ) + cell = _PartialCellV2() + prd._copy_from_previous(cell) + self.assertEqual(cell.row_key, ROW_KEY) + self.assertEqual(cell.family_name, FAMILY_NAME) + self.assertEqual(cell.qualifier, QUALIFIER) + self.assertEqual(cell.timestamp_micros, 0) + self.assertEqual(cell.labels, []) + + def test__save_row_no_cell(self): + ROW_KEY = 'RK' + prd = self._makeOne([]) + row = prd._row = _Dummy(row_key=ROW_KEY) + prd._cell = None + prd._save_current_row() + self.assertTrue(prd._rows[ROW_KEY] is row) + + def test_invalid_last_scanned_row_key_on_start(self): + from gcloud.bigtable.row_data import ReadRowsResponseError + response = _ReadRowsResponseV2(chunks=(), last_scanned_row_key='ABC') + iterator = _MockCancellableIterator(response) + prd = self._makeOne(iterator) + with self.assertRaises(ReadRowsResponseError): + prd.consume_next() + + def test_valid_last_scanned_row_key_on_start(self): + response = _ReadRowsResponseV2( + chunks=(), last_scanned_row_key='AFTER') + iterator = _MockCancellableIterator(response) + prd = self._makeOne(iterator) + prd._last_scanned_row_key = 'BEFORE' + prd.consume_next() + self.assertEqual(prd._last_scanned_row_key, 'AFTER') + + def test_invalid_empty_chunk(self): + from gcloud.bigtable.row_data import ReadRowsResponseError + chunks = _generate_cell_chunks(['']) + response = _ReadRowsResponseV2(chunks) + iterator = _MockCancellableIterator(response) + prd = self._makeOne(iterator) + with self.assertRaises(ReadRowsResponseError): + prd.consume_next() + + def test_invalid_empty_second_chunk(self): + from gcloud.bigtable.row_data import ReadRowsResponseError + chunks = _generate_cell_chunks(['', '']) + first = chunks[0] + first.row_key = b'RK' + first.family_name.value = 'A' + first.qualifier.value = b'C' + response = _ReadRowsResponseV2(chunks) + iterator = _MockCancellableIterator(response) + prd = self._makeOne(iterator) + with self.assertRaises(ReadRowsResponseError): + prd.consume_next() + + # JSON Error cases + + def _fail_during_consume(self, testcase_name): + from gcloud.bigtable.row_data import ReadRowsResponseError + chunks, _ = self._load_json_test(testcase_name) + response = _ReadRowsResponseV2(chunks) + iterator = _MockCancellableIterator(response) + prd = self._makeOne(iterator) + with self.assertRaises(ReadRowsResponseError): + prd.consume_next() + + def _fail_during_rows(self, testcase_name): + from gcloud.bigtable.row_data import ReadRowsResponseError + chunks, _ = self._load_json_test(testcase_name) + response = _ReadRowsResponseV2(chunks) + iterator = _MockCancellableIterator(response) + prd = self._makeOne(iterator) + prd.consume_next() + with self.assertRaises(ReadRowsResponseError): + _ = prd.rows + + def test_invalid_no_commit(self): + self._fail_during_rows('invalid - no commit') + + def test_invalid_no_cell_key_before_commit(self): + self._fail_during_consume('invalid - no cell key before commit') + + def test_invalid_no_cell_key_before_value(self): + self._fail_during_consume('invalid - no cell key before value') + + def test_invalid_new_col_family_wo_qualifier(self): + self._fail_during_consume( + 'invalid - new col family must specify qualifier') + + def test_invalid_no_commit_between_rows(self): + self._fail_during_consume('invalid - no commit between rows') + + def test_invalid_no_commit_after_first_row(self): + self._fail_during_consume('invalid - no commit after first row') + + def test_invalid_last_row_missing_commit(self): + self._fail_during_rows('invalid - last row missing commit') + + def test_invalid_duplicate_row_key(self): + self._fail_during_consume('invalid - duplicate row key') + + def test_invalid_new_row_missing_row_key(self): + self._fail_during_consume('invalid - new row missing row key') + + def test_invalid_bare_reset(self): + self._fail_during_consume('invalid - bare reset') + + def test_invalid_bad_reset_no_commit(self): + self._fail_during_consume('invalid - bad reset, no commit') + + def test_invalid_missing_key_after_reset(self): + self._fail_during_consume('invalid - missing key after reset') + + def test_invalid_reset_with_chunk(self): + self._fail_during_consume('invalid - reset with chunk') + + def test_invalid_commit_with_chunk(self): + self._fail_during_consume('invalid - commit with chunk') + + # Non-error cases + + _marker = object() + + def _match_results(self, testcase_name, expected_result=_marker): + import operator + key_func = operator.itemgetter('rk', 'fm', 'qual') + chunks, results = self._load_json_test(testcase_name) + response = _ReadRowsResponseV2(chunks) + iterator = _MockCancellableIterator(response) + prd = self._makeOne(iterator) + prd.consume_next() + flattened = sorted(_flatten_cells(prd), key=key_func) + if expected_result is self._marker: + expected_result = sorted(results, key=key_func) + self.assertEqual(flattened, expected_result) + + def test_bare_commit_implies_ts_zero(self): + self._match_results('bare commit implies ts=0') + + def test_simple_row_with_timestamp(self): + self._match_results('simple row with timestamp') + + def test_missing_timestamp_implies_ts_zero(self): + self._match_results('missing timestamp, implied ts=0') + + def test_empty_cell_value(self): + self._match_results('empty cell value') + + def test_two_unsplit_cells(self): + self._match_results('two unsplit cells') + + def test_two_qualifiers(self): + self._match_results('two qualifiers') + + def test_two_families(self): + self._match_results('two families') + + def test_with_labels(self): + self._match_results('with labels') + + def test_split_cell_bare_commit(self): + self._match_results('split cell, bare commit') + + def test_split_cell(self): + self._match_results('split cell') + + def test_split_four_ways(self): + self._match_results('split four ways') + + def test_two_split_cells(self): + self._match_results('two split cells') + + def test_multi_qualifier_splits(self): + self._match_results('multi-qualifier splits') + + def test_multi_qualifier_multi_split(self): + self._match_results('multi-qualifier multi-split') + + def test_multi_family_split(self): + self._match_results('multi-family split') + + def test_two_rows(self): + self._match_results('two rows') + + def test_two_rows_implicit_timestamp(self): + self._match_results('two rows implicit timestamp') + + def test_two_rows_empty_value(self): + self._match_results('two rows empty value') + + def test_two_rows_one_with_multiple_cells(self): + self._match_results('two rows, one with multiple cells') + + def test_two_rows_multiple_cells_multiple_families(self): + self._match_results('two rows, multiple cells, multiple families') + + def test_two_rows_multiple_cells(self): + self._match_results('two rows, multiple cells') + + def test_two_rows_four_cells_two_labels(self): + self._match_results('two rows, four cells, 2 labels') + + def test_two_rows_with_splits_same_timestamp(self): + self._match_results('two rows with splits, same timestamp') + + def test_no_data_after_reset(self): + # JSON testcase has `"results": null` + self._match_results('no data after reset', expected_result=[]) + + def test_simple_reset(self): + self._match_results('simple reset') + + def test_reset_to_new_val(self): + self._match_results('reset to new val') + + def test_reset_to_new_qual(self): + self._match_results('reset to new qual') + + def test_reset_with_splits(self): + self._match_results('reset with splits') + + def test_two_resets(self): + self._match_results('two resets') + + def test_reset_to_new_row(self): + self._match_results('reset to new row') + + def test_reset_in_between_chunks(self): + self._match_results('reset in between chunks') + + def test_empty_cell_chunk(self): + self._match_results('empty cell chunk') + + +def _flatten_cells(prd): + # Match results format from JSON testcases. + # Doesn't handle error cases. + from gcloud._helpers import _bytes_to_unicode + from gcloud._helpers import _microseconds_from_datetime + for row_key, row in prd.rows.items(): + for family_name, family in row.cells.items(): + for qualifier, column in family.items(): + for cell in column: + yield { + u'rk': _bytes_to_unicode(row_key), + u'fm': family_name, + u'qual': _bytes_to_unicode(qualifier), + u'ts': _microseconds_from_datetime(cell.timestamp), + u'value': _bytes_to_unicode(cell.value), + u'label': u' '.join(cell.labels), + u'error': False, + } + + class _MockCancellableIterator(object): cancel_calls = 0 @@ -524,6 +891,31 @@ def next(self): return next(self.iter_values) +class _Dummy(object): + + def __init__(self, **kw): + self.__dict__.update(kw) + + +class _PartialCellV2(object): + + row_key = '' + family_name = u'' + qualifier = b'' + timestamp_micros = 0 + + def __init__(self, **kw): + self.labels = kw.pop('labels', []) + self.__dict__.update(kw) + + +class _ReadRowsResponseV2(object): + + def __init__(self, chunks, last_scanned_row_key=''): + self.chunks = chunks + self.last_scanned_row_key = last_scanned_row_key + + def _generate_cell_chunks(chunk_text_pbs): from google.protobuf.text_format import Merge from gcloud.bigtable._generated_v2.bigtable_pb2 import ReadRowsResponse