From 308faf84c91ca10c7bae965f9907fc408e5df9c6 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 11 Mar 2025 12:33:02 -0700 Subject: [PATCH 01/21] Multi-column support --- bigquery_magics/bigquery.py | 7 ++-- bigquery_magics/graph_server.py | 60 ++++++++++++++------------------- 2 files changed, 29 insertions(+), 38 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index d8d33c5..33a01d0 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -657,9 +657,10 @@ def _is_valid_json(s: str): def _supports_graph_widget(query_result: pandas.DataFrame): num_rows, num_columns = query_result.shape - if num_columns != 1: - return False - return query_result[query_result.columns[0]].apply(_is_valid_json).all() + for column in query_result.columns: + if not query_result[column].apply(_is_valid_json).all(): + return False + return True def _make_bq_query( diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 7c55279..5e0ca0c 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -56,45 +56,35 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): ) try: - column_name = None - column_value = None - for key, value in query_results.items(): - if column_name is None: - if not isinstance(key, str): - raise ValueError(f"Expected outer key to be str, got {type(key)}") - if not isinstance(value, dict): - raise ValueError( - f"Expected outer value to be dict, got {type(value)}" - ) - column_name = key - column_value = value - else: - # TODO: Implement multi-column support. - raise ValueError( - "Query has multiple columns - graph visualization not supported" - ) - if column_name is None or column_value is None: - raise ValueError( - "query result with no columns is not supported for graph visualization" - ) - - fields: List[StructType.Field] = [ - StructType.Field(name=column_name, type=Type(code=TypeCode.JSON)) - ] - data = {column_name: []} + fields: List[StructType.Field] = [] + data = {} rows = [] - for value_key, value_value in column_value.items(): - if not isinstance(value_key, str): - raise ValueError(f"Expected inner key to be str, got {type(value_key)}") - if not isinstance(value_value, str): + for key, value in query_results.items(): + column_name = None + column_value = None + if not isinstance(key, str): + raise ValueError(f"Expected outer key to be str, got {type(key)}") + if not isinstance(value, dict): raise ValueError( - f"Expected inner value to be str, got {type(value_value)}" + f"Expected outer value to be dict, got {type(value)}" ) - row_json = json.loads(value_value) + column_name = key + column_value = value + + fields.append(StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))) + data[column_name] = [] + for value_key, value_value in column_value.items(): + if not isinstance(value_key, str): + raise ValueError(f"Expected inner key to be str, got {type(value_key)}") + if not isinstance(value_value, str): + raise ValueError( + f"Expected inner value to be str, got {type(value_value)}" + ) + row_json = json.loads(value_value) - if row_json is not None: - data[column_name].append(row_json) - rows.append([row_json]) + if row_json is not None: + data[column_name].append(row_json) + rows.append([row_json]) d, ignored_columns = columns_to_native_numpy(data, fields) From 22c9e8f21758db9cdd947fd36c8ccf42d8849f43 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 11 Mar 2025 13:28:44 -0700 Subject: [PATCH 02/21] Remove 'rows' field in results, as it's not used by the Javascript. Also, add a test for the multi-column case. --- bigquery_magics/graph_server.py | 7 +++- tests/unit/test_graph_server.py | 67 +++++++++++++++++---------------- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 5e0ca0c..930a615 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -102,10 +102,15 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): return { "response": { + # These fields populate the graph result view. "nodes": nodes, "edges": edges, + + # This populates the visualizer's schema view, but not yet implemented on the + # BigQuery side. "schema": None, - "rows": rows, + + # This field is used to populate the visualizer's tabular view. "query_result": data, } } diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index d4100c4..434b722 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -140,7 +140,6 @@ def test_convert_one_column_no_rows(): "edges": [], "nodes": [], "query_result": {"result": []}, - "rows": [], "schema": None, } } @@ -164,7 +163,6 @@ def test_convert_one_column_one_row_one_column(): _validate_nodes_and_edges(result) assert result["response"]["query_result"] == {"result": [row_alex_owns_account]} - assert result["response"]["rows"] == [[row_alex_owns_account]] assert result["response"]["schema"] is None @@ -185,11 +183,6 @@ def test_convert_one_column_one_row_one_column_null_json(): "edges": [], "nodes": [], "query_result": {"result": []}, - "rows": [ - [ - None, - ] - ], "schema": None, }, } @@ -218,10 +211,34 @@ def test_convert_one_column_two_rows(): assert result["response"]["query_result"] == { "result": [row_alex_owns_account, row_lee_owns_account] } - assert result["response"]["rows"] == [ - [row_alex_owns_account], - [row_lee_owns_account], - ] + assert result["response"]["schema"] is None + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_row_two_columns(): + result = graph_server.convert_graph_data( + { + "col1": { + "0": json.dumps(row_alex_owns_account), + }, + "col2": { + "0": json.dumps(row_lee_owns_account), + }, + } + ) + print(json.dumps(result)) + + assert len(result["response"]["nodes"]) == 4 + assert len(result["response"]["edges"]) == 2 + + _validate_nodes_and_edges(result) + + assert result["response"]["query_result"] == { + "col1": [row_alex_owns_account], + "col2": [row_lee_owns_account], + } assert result["response"]["schema"] is None @@ -243,7 +260,6 @@ def test_convert_nongraph_json(): assert len(result["response"]["edges"]) == 0 assert result["response"]["query_result"] == {"result": [{"foo": 1, "bar": 2}]} - assert result["response"]["rows"] == [[{"foo": 1, "bar": 2}]] assert result["response"]["schema"] is None @@ -297,32 +313,18 @@ def test_convert_inner_value_not_string(): assert result == {"error": "Expected inner value to be str, got "} -@pytest.mark.skipif( - graph_visualization is None, reason="Requires `spanner-graph-notebook`" -) -def test_convert_one_column_one_row_two_columns(): - result = graph_server.convert_graph_data( - { - "result1": { - "0": json.dumps(row_alex_owns_account), - }, - "result2": { - "0": json.dumps(row_alex_owns_account), - }, - } - ) - assert result == { - "error": "Query has multiple columns - graph visualization not supported" - } - - @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_empty_dict(): result = graph_server.convert_graph_data({}) assert result == { - "error": "query result with no columns is not supported for graph visualization" + "response": { + "nodes": [], + "edges": [], + "schema": None, + "query_result": {}, + } } @@ -411,7 +413,6 @@ def test_post_query(self): self.assertEqual( response_data["query_result"], {"result": [row_alex_owns_account]} ) - self.assertEqual(response_data["rows"], [[row_alex_owns_account]]) self.assertIsNone(response_data["schema"]) From cbffa75da413d79709af09b92fc7e84462f433b7 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 11 Mar 2025 13:36:29 -0700 Subject: [PATCH 03/21] reformat --- bigquery_magics/graph_server.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 930a615..6104328 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -65,17 +65,19 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): if not isinstance(key, str): raise ValueError(f"Expected outer key to be str, got {type(key)}") if not isinstance(value, dict): - raise ValueError( - f"Expected outer value to be dict, got {type(value)}" - ) + raise ValueError(f"Expected outer value to be dict, got {type(value)}") column_name = key column_value = value - fields.append(StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))) + fields.append( + StructType.Field(name=column_name, type=Type(code=TypeCode.JSON)) + ) data[column_name] = [] for value_key, value_value in column_value.items(): if not isinstance(value_key, str): - raise ValueError(f"Expected inner key to be str, got {type(value_key)}") + raise ValueError( + f"Expected inner key to be str, got {type(value_key)}" + ) if not isinstance(value_value, str): raise ValueError( f"Expected inner value to be str, got {type(value_value)}" @@ -105,11 +107,9 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): # These fields populate the graph result view. "nodes": nodes, "edges": edges, - # This populates the visualizer's schema view, but not yet implemented on the # BigQuery side. "schema": None, - # This field is used to populate the visualizer's tabular view. "query_result": data, } From 43eb06ff026a7846f2b94a77982bc7b0e4fe1c40 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 11 Mar 2025 14:19:13 -0700 Subject: [PATCH 04/21] Fix test_bigquery.py tests, remove unnecessary mocking of GraphServer --- tests/unit/test_bigquery.py | 52 +++++++++++++++---------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 0ab9685..87bc86d 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -39,6 +39,7 @@ import bigquery_magics import bigquery_magics.bigquery as magics +import bigquery_magics.graph_server as graph_server try: import google.cloud.bigquery_storage as bigquery_storage @@ -677,10 +678,12 @@ def test_bigquery_graph_json_json_result(monkeypatch): bqstorage_client_patch ), display_patch as display_mock: run_query_mock.return_value = query_job_mock - return_value = ip.run_cell_magic("bigquery", "--graph", sql) + try: + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + finally: + graph_server.graph_server.stop_server() - # As we only support visualization with single-column queries, the visualizer should not be launched. - display_mock.assert_not_called() + display_mock.assert_called() assert bqstorage_mock.called # BQ storage client was used assert isinstance(return_value, pandas.DataFrame) @@ -729,21 +732,13 @@ def test_bigquery_graph_json_result(monkeypatch): ] result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) - graph_server_init_patch = mock.patch( - "bigquery_magics.graph_server.GraphServer.init", autospec=True - ) display_patch = mock.patch("IPython.display.display", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, ( - bqstorage_client_patch - ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: - graph_server_init_mock.return_value = mock.Mock() - graph_server_init_mock.return_value.is_alive = mock.Mock() - graph_server_init_mock.return_value.is_alive.return_value = True + with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -770,7 +765,10 @@ def test_bigquery_graph_json_result(monkeypatch): ) # identifier in 3rd row of query result # Make sure we can run a second graph query, after the graph server is already running. - return_value = ip.run_cell_magic("bigquery", "--graph", sql) + try: + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + finally: + graph_server.graph_server.stop_server() # Sanity check that the HTML content looks like graph visualization. Minimal check # to allow Spanner to change its implementation without breaking this test. @@ -841,21 +839,18 @@ def test_bigquery_graph_colab(monkeypatch): ] result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) - graph_server_init_patch = mock.patch( - "bigquery_magics.graph_server.GraphServer.init", autospec=True - ) display_patch = mock.patch("IPython.display.display", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, ( - bqstorage_client_patch - ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: run_query_mock.return_value = query_job_mock - graph_server_init_mock.return_value = None - return_value = ip.run_cell_magic("bigquery", "--graph", sql) + try: + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + finally: + graph_server.graph_server.stop_server() assert len(display_mock.call_args_list) == 1 assert len(display_mock.call_args_list[0]) == 2 @@ -880,7 +875,6 @@ def test_bigquery_graph_colab(monkeypatch): # Make sure we actually used colab path, not GraphServer path. assert sys.modules["google.colab"].output.register_callback.called - assert not graph_server_init_mock.called assert bqstorage_mock.called # BQ storage client was used assert isinstance(return_value, pandas.DataFrame) @@ -902,7 +896,6 @@ def test_colab_callback(): "edges": [], "nodes": [], "query_result": {"result": []}, - "rows": [], "schema": None, } } @@ -937,22 +930,19 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch): sql = "SELECT graph_json FROM t" result = pandas.DataFrame([], columns=["graph_json"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) - graph_server_init_patch = mock.patch( - "bigquery_magics.graph_server.GraphServer.init", autospec=True - ) display_patch = mock.patch("IPython.display.display", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, ( - bqstorage_client_patch - ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: run_query_mock.return_value = query_job_mock - graph_server_init_mock.return_value = None with pytest.raises(ImportError): - ip.run_cell_magic("bigquery", "--graph", sql) + try: + ip.run_cell_magic("bigquery", "--graph", sql) + finally: + graph_server.graph_server.stop_server() display_mock.assert_not_called() From 46dbcc15c15e11c16e745c80808d20a710a2a355 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 11 Mar 2025 14:21:04 -0700 Subject: [PATCH 05/21] reformat --- tests/unit/test_bigquery.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 87bc86d..33db7bc 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -738,7 +738,9 @@ def test_bigquery_graph_json_result(monkeypatch): ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -845,7 +847,9 @@ def test_bigquery_graph_colab(monkeypatch): ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock try: return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -936,7 +940,9 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch): ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock with pytest.raises(ImportError): try: From 57e22c06d01281e8b00fed8e12f3acfd95b01738 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 11 Mar 2025 14:58:33 -0700 Subject: [PATCH 06/21] Get basic graph visualization working against latest spanner code --- bigquery_magics/graph_server.py | 23 ++++------------------- setup.py | 2 +- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 6104328..09012f6 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -50,10 +50,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): # to exist upstream. from google.cloud.spanner_v1.types import StructType, Type, TypeCode import networkx - from spanner_graphs.conversion import ( - columns_to_native_numpy, - prepare_data_for_graphing, - ) + from spanner_graphs.conversion import get_nodes_edges try: fields: List[StructType.Field] = [] @@ -88,25 +85,13 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): data[column_name].append(row_json) rows.append([row_json]) - d, ignored_columns = columns_to_native_numpy(data, fields) - - graph: networkx.classes.DiGraph = prepare_data_for_graphing( - incoming=d, schema_json=None - ) - - nodes = [] - for node_id, node in graph.nodes(data=True): - nodes.append(node) - - edges = [] - for from_id, to_id, edge in graph.edges(data=True): - edges.append(edge) + nodes, edges = get_nodes_edges(data, fields, schema_json=None) return { "response": { # These fields populate the graph result view. - "nodes": nodes, - "edges": edges, + "nodes": [node.to_json() for node in nodes], + "edges": [edge.to_json() for edge in edges], # This populates the visualizer's schema view, but not yet implemented on the # BigQuery side. "schema": None, diff --git a/setup.py b/setup.py index 8311429..5c52476 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ "bigframes": ["bigframes >= 1.17.0"], "geopandas": ["geopandas >= 1.0.1"], "spanner-graph-notebook": [ - "spanner-graph-notebook >= 1.1.1, <=1.1.1", + "spanner-graph-notebook >= 1.1.3", "networkx", "portpicker", ], From 36e0f3442731e2bdbd1109784b9849722872281a Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 11 Mar 2025 15:05:37 -0700 Subject: [PATCH 07/21] Fix unit tests --- tests/unit/test_graph_server.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 434b722..362b702 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -116,18 +116,17 @@ def _validate_nodes_and_edges(result): for edge in result["response"]["edges"]: - assert "id" in edge - assert edge["label"] == "Owns" - assert "source" in edge - assert "target" in edge + assert "source_node_identifier" in edge + assert "destination_node_identifier" in edge + assert "identifier" in edge + assert "Owns" in edge["labels"] assert "properties" in edge + print(result["response"]["nodes"]) for node in result["response"]["nodes"]: - assert "id" in node - assert "key_property_names" in node - assert node["label"] in ("Account", "Person") + assert "identifier" in node + assert "Account" in node["labels"] or "Person" in node["labels"] assert "properties" in node - assert "value" in node @pytest.mark.skipif( @@ -228,7 +227,6 @@ def test_convert_one_row_two_columns(): }, } ) - print(json.dumps(result)) assert len(result["response"]["nodes"]) == 4 assert len(result["response"]["edges"]) == 2 From 469583e80250e43955c2e6bce2648a50ae93c268 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 11 Mar 2025 15:29:04 -0700 Subject: [PATCH 08/21] Ignore columns we don't know how to visualize for visualization purposes, but still show them in the tabular view, and don't block visualizing remaining columns --- bigquery_magics/bigquery.py | 10 ++++--- bigquery_magics/graph_server.py | 26 +++++++---------- tests/unit/test_graph_server.py | 51 +++++++++++++++++---------------- 3 files changed, 43 insertions(+), 44 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 33a01d0..f45a996 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -656,11 +656,13 @@ def _is_valid_json(s: str): def _supports_graph_widget(query_result: pandas.DataFrame): - num_rows, num_columns = query_result.shape + # Visualization is supported if we have any json items to display. + # (Non-json items are excluded from visualization, but we still want to bring up + # the visualizer for the json items.) for column in query_result.columns: - if not query_result[column].apply(_is_valid_json).all(): - return False - return True + if query_result[column].apply(_is_valid_json).any(): + return True + return False def _make_bq_query( diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 09012f6..63db5c3 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -55,7 +55,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): try: fields: List[StructType.Field] = [] data = {} - rows = [] + tabular_data = {} for key, value in query_results.items(): column_name = None column_value = None @@ -70,20 +70,16 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): StructType.Field(name=column_name, type=Type(code=TypeCode.JSON)) ) data[column_name] = [] - for value_key, value_value in column_value.items(): - if not isinstance(value_key, str): - raise ValueError( - f"Expected inner key to be str, got {type(value_key)}" - ) - if not isinstance(value_value, str): - raise ValueError( - f"Expected inner value to be str, got {type(value_value)}" - ) - row_json = json.loads(value_value) - - if row_json is not None: + tabular_data[column_name] = [] + for value_key, value_value in column_value.items(): + try: + row_json = json.loads(value_value) data[column_name].append(row_json) - rows.append([row_json]) + tabular_data[column_name].append(row_json) + except: + # Non-JSON columns cannot be visualized, but we still want them + # in the tabular view. + tabular_data[column_name].append(str(value_value)) nodes, edges = get_nodes_edges(data, fields, schema_json=None) @@ -96,7 +92,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): # BigQuery side. "schema": None, # This field is used to populate the visualizer's tabular view. - "query_result": data, + "query_result": tabular_data, } } except Exception as e: diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 362b702..dcc3d2d 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -168,23 +168,24 @@ def test_convert_one_column_one_row_one_column(): @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) -def test_convert_one_column_one_row_one_column_null_json(): +def test_convert_one_column_two_rows_one_column_null_json(): result = graph_server.convert_graph_data( { "result": { "0": json.dumps(None), + "1": json.dumps(row_alex_owns_account), } } ) - assert result == { - "response": { - "edges": [], - "nodes": [], - "query_result": {"result": []}, - "schema": None, - }, - } + # Null JSON element should be ignored in visualization, but should still be present in tabular view. + assert len(result["response"]["nodes"]) == 2 + assert len(result["response"]["edges"]) == 1 + + _validate_nodes_and_edges(result) + + assert result["response"]["query_result"] == {"result": [None, row_alex_owns_account]} + assert result["response"]["schema"] is None _validate_nodes_and_edges(result) @@ -286,29 +287,29 @@ def test_convert_outer_value_not_dict(): @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) -def test_convert_inner_key_not_string(): +def test_convert_inner_value_not_string(): result = graph_server.convert_graph_data( { - "result": { - 0: json.dumps({"foo": 1, "bar": 2}), + "col1": { + "0": json.dumps(row_alex_owns_account), + }, + "col2": { + "0": 12345, } } ) - assert result == {"error": "Expected inner key to be str, got "} + # Non-JSON column should be ignored in visualizer view, but still appear in tabular view. + assert len(result["response"]["nodes"]) == 2 + assert len(result["response"]["edges"]) == 1 -@pytest.mark.skipif( - graph_visualization is None, reason="Requires `spanner-graph-notebook`" -) -def test_convert_inner_value_not_string(): - result = graph_server.convert_graph_data( - { - "result": { - "0": 1, - } - } - ) - assert result == {"error": "Expected inner value to be str, got "} + _validate_nodes_and_edges(result) + + assert result["response"]["query_result"] == { + "col1": [row_alex_owns_account], + "col2": ["12345"], + } + assert result["response"]["schema"] is None @pytest.mark.skipif( From 6223652f9a11d9ad5ebfb79f4cbc2f8b7c9f5c79 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 13:19:27 -0700 Subject: [PATCH 09/21] reformat --- bigquery_magics/graph_server.py | 2 +- tests/unit/test_graph_server.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 63db5c3..d10f60b 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -71,7 +71,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): ) data[column_name] = [] tabular_data[column_name] = [] - for value_key, value_value in column_value.items(): + for value_key, value_value in column_value.items(): try: row_json = json.loads(value_value) data[column_name].append(row_json) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index dcc3d2d..34ea17d 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -184,7 +184,9 @@ def test_convert_one_column_two_rows_one_column_null_json(): _validate_nodes_and_edges(result) - assert result["response"]["query_result"] == {"result": [None, row_alex_owns_account]} + assert result["response"]["query_result"] == { + "result": [None, row_alex_owns_account] + } assert result["response"]["schema"] is None _validate_nodes_and_edges(result) @@ -295,7 +297,7 @@ def test_convert_inner_value_not_string(): }, "col2": { "0": 12345, - } + }, } ) From c929e33b05d3ebbdd4f3d4ef34bf8656a688c7fb Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 13:22:28 -0700 Subject: [PATCH 10/21] Remove unused dependency on networkx --- bigquery_magics/graph_server.py | 26 ++++++++++++++++++++++++-- setup.py | 1 - 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index d10f60b..e816a76 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -49,7 +49,6 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): # does not even get called unless spanner_graphs has already been confirmed # to exist upstream. from google.cloud.spanner_v1.types import StructType, Type, TypeCode - import networkx from spanner_graphs.conversion import get_nodes_edges try: @@ -71,7 +70,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): ) data[column_name] = [] tabular_data[column_name] = [] - for value_key, value_value in column_value.items(): + for value_key, value_value in column_value.items(): try: row_json = json.loads(value_value) data[column_name].append(row_json) @@ -209,6 +208,27 @@ def handle_post_query(self): response = convert_graph_data(query_results=json.loads(data["params"])) self.do_data_response(response) + def handle_post_node_expansion(self): + """Handle POST requests for node expansion. + + Expects a JSON payload with: + - params: A JSON string containing connection parameters (project, instance, database, graph) + - request: A dictionary with node details (uid, node_labels, node_properties, direction, edge_label) + """ + try: + data = self.parse_post_data() + + # Execute node expansion with: + # - params_str: JSON string with connection parameters (project, instance, database, graph) + # - request: Dict with node details (uid, node_labels, node_properties, direction, edge_label) + self.do_data_response(execute_node_expansion( + params_str=data.get("params"), + request=data.get("request") + )) + except BaseException as e: + self.do_error_response(e) + return + def do_GET(self): assert self.path == GraphServer.endpoints["get_ping"] self.handle_get_ping() @@ -216,6 +236,8 @@ def do_GET(self): def do_POST(self): if self.path == GraphServer.endpoints["post_ping"]: self.handle_post_ping() + elif self.path == GraphServer.endpoints["post_node_expansion"]: + self.handle_post_node_expansion() else: assert self.path == GraphServer.endpoints["post_query"] self.handle_post_query() diff --git a/setup.py b/setup.py index 5c52476..1d7c595 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,6 @@ "geopandas": ["geopandas >= 1.0.1"], "spanner-graph-notebook": [ "spanner-graph-notebook >= 1.1.3", - "networkx", "portpicker", ], } From 3cdf1ac07a10a03a68a917c43f46277b45d7a3f2 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 13:52:19 -0700 Subject: [PATCH 11/21] Implement stub callback for node expansion --- bigquery_magics/bigquery.py | 41 ++++++++++++++++++++++----------- bigquery_magics/graph_server.py | 7 +++++- tests/unit/test_bigquery.py | 21 +++++++++++++++-- 3 files changed, 52 insertions(+), 17 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index f45a996..dc3b8f5 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -596,21 +596,33 @@ def _handle_result(result, args): return result -def _is_colab() -> bool: - """Check if code is running in Google Colab""" - try: - import google.colab # noqa: F401 - - return True - except ImportError: - return False - - -def _colab_callback(query: str, params: str): +def _colab_query_callback(query: str, params: str): return IPython.core.display.JSON( graph_server.convert_graph_data(query_results=json.loads(params)) ) +def _colab_node_expansion_callback(request: dict, params_str: str): + """Handle node expansion requests in Google Colab environment + + Args: + request: A dictionary containing node expansion details including: + - uid: str - Unique identifier of the node to expand + - node_labels: List[str] - Labels of the node + - node_properties: List[Dict] - Properties of the node with key, value, and type + - direction: str - Direction of expansion ("INCOMING" or "OUTGOING") + - edge_label: Optional[str] - Label of edges to filter by + params_str: A JSON string containing connection parameters + + Returns: + JSON: A JSON-serialized response containing either: + - The query results with nodes and edges + - An error message if the request failed + """ + try: + return IPython.core.display.JSON(graph_server.execute_node_expansion(params_str, request)) + except BaseException as e: + return IPython.core.display.JSON({"error": e}) + singleton_server_thread: threading.Thread = None @@ -628,11 +640,12 @@ def _add_graph_widget(query_result): # visualizer widget. In colab, we are not able to create an http server on a # background thread, so we use a special colab-specific api to register a callback, # to be invoked from Javascript. - if _is_colab(): + try: from google.colab import output - output.register_callback("graph_visualization.Query", _colab_callback) - else: + output.register_callback("graph_visualization.Query", _colab_query_callback) + output.register_callback("graph_visualization.NodeExpansion", _colab_node_expansion_callback) + except ImportError: global singleton_server_thread alive = singleton_server_thread and singleton_server_thread.is_alive() if not alive: diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index e816a76..2a5f379 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -20,6 +20,10 @@ from typing import Dict, List +def execute_node_expansion(params, request): + return {"error": "Node expansion not yet implemented"} + + def convert_graph_data(query_results: Dict[str, Dict[str, str]]): """ Converts graph data to the form expected by the visualization framework. @@ -75,7 +79,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): row_json = json.loads(value_value) data[column_name].append(row_json) tabular_data[column_name].append(row_json) - except: + except (ValueError, TypeError): # Non-JSON columns cannot be visualized, but we still want them # in the tabular view. tabular_data[column_name].append(str(value_value)) @@ -113,6 +117,7 @@ class GraphServer: endpoints = { "get_ping": "/get_ping", "post_ping": "/post_ping", + "post_node_expansion": '/post_node_expansion', "post_query": "/post_query", } diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 33db7bc..d09c39a 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -891,8 +891,8 @@ def test_bigquery_graph_colab(monkeypatch): graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) -def test_colab_callback(): - result = bigquery_magics.bigquery._colab_callback( +def test_colab_query_callback(): + result = bigquery_magics.bigquery._colab_query_callback( "query", json.dumps({"result": {}}) ) assert result.data == { @@ -905,6 +905,23 @@ def test_colab_callback(): } +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_colab_node_expansion_callback(): + result = bigquery_magics.bigquery._colab_node_expansion_callback( + request={"uid": "test_uid", + "node_labels": ["label1, label2"], + "node_properites": {}, + "direction": "INCOMING", + "edge_label": None}, + params_str="{}") + + assert result.data == {"error": "Node expansion not yet implemented"} + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( graph_visualization is not None or bigquery_storage is None, From ba92a07e3cc5f342f3d9f9862b57b6daef7ec124 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 14:29:51 -0700 Subject: [PATCH 12/21] Fix test_bigquery_graph_missing_spanner_deps. The mock query result must contain valid json so that the visualizer attempts to get launched, in order for the code path we're trying to test to get reached. --- tests/unit/test_bigquery.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index d09c39a..72ba3b0 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -949,7 +949,18 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch): "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) sql = "SELECT graph_json FROM t" - result = pandas.DataFrame([], columns=["graph_json"]) + graph_json_rows = [ + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] + """, + ] + result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) display_patch = mock.patch("IPython.display.display", autospec=True) query_job_mock = mock.create_autospec( From 176c85443c1e9daa301bcb23f122ee439bfc3121 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 14:32:48 -0700 Subject: [PATCH 13/21] reformat --- bigquery_magics/bigquery.py | 13 +++++++++---- bigquery_magics/graph_server.py | 15 ++++++++------- tests/unit/test_bigquery.py | 15 +++++++++------ 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index dc3b8f5..91283ab 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -601,9 +601,10 @@ def _colab_query_callback(query: str, params: str): graph_server.convert_graph_data(query_results=json.loads(params)) ) + def _colab_node_expansion_callback(request: dict, params_str: str): """Handle node expansion requests in Google Colab environment - + Args: request: A dictionary containing node expansion details including: - uid: str - Unique identifier of the node to expand @@ -612,14 +613,16 @@ def _colab_node_expansion_callback(request: dict, params_str: str): - direction: str - Direction of expansion ("INCOMING" or "OUTGOING") - edge_label: Optional[str] - Label of edges to filter by params_str: A JSON string containing connection parameters - + Returns: JSON: A JSON-serialized response containing either: - The query results with nodes and edges - An error message if the request failed """ try: - return IPython.core.display.JSON(graph_server.execute_node_expansion(params_str, request)) + return IPython.core.display.JSON( + graph_server.execute_node_expansion(params_str, request) + ) except BaseException as e: return IPython.core.display.JSON({"error": e}) @@ -644,7 +647,9 @@ def _add_graph_widget(query_result): from google.colab import output output.register_callback("graph_visualization.Query", _colab_query_callback) - output.register_callback("graph_visualization.NodeExpansion", _colab_node_expansion_callback) + output.register_callback( + "graph_visualization.NodeExpansion", _colab_node_expansion_callback + ) except ImportError: global singleton_server_thread alive = singleton_server_thread and singleton_server_thread.is_alive() diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 2a5f379..e79b4d2 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -74,7 +74,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): ) data[column_name] = [] tabular_data[column_name] = [] - for value_key, value_value in column_value.items(): + for value_key, value_value in column_value.items(): try: row_json = json.loads(value_value) data[column_name].append(row_json) @@ -117,7 +117,7 @@ class GraphServer: endpoints = { "get_ping": "/get_ping", "post_ping": "/post_ping", - "post_node_expansion": '/post_node_expansion', + "post_node_expansion": "/post_node_expansion", "post_query": "/post_query", } @@ -215,7 +215,7 @@ def handle_post_query(self): def handle_post_node_expansion(self): """Handle POST requests for node expansion. - + Expects a JSON payload with: - params: A JSON string containing connection parameters (project, instance, database, graph) - request: A dictionary with node details (uid, node_labels, node_properties, direction, edge_label) @@ -226,10 +226,11 @@ def handle_post_node_expansion(self): # Execute node expansion with: # - params_str: JSON string with connection parameters (project, instance, database, graph) # - request: Dict with node details (uid, node_labels, node_properties, direction, edge_label) - self.do_data_response(execute_node_expansion( - params_str=data.get("params"), - request=data.get("request") - )) + self.do_data_response( + execute_node_expansion( + params_str=data.get("params"), request=data.get("request") + ) + ) except BaseException as e: self.do_error_response(e) return diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 72ba3b0..83a4356 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -912,12 +912,15 @@ def test_colab_query_callback(): ) def test_colab_node_expansion_callback(): result = bigquery_magics.bigquery._colab_node_expansion_callback( - request={"uid": "test_uid", - "node_labels": ["label1, label2"], - "node_properites": {}, - "direction": "INCOMING", - "edge_label": None}, - params_str="{}") + request={ + "uid": "test_uid", + "node_labels": ["label1, label2"], + "node_properites": {}, + "direction": "INCOMING", + "edge_label": None, + }, + params_str="{}", + ) assert result.data == {"error": "Node expansion not yet implemented"} From 2d899b4cebeec9f1ccc62a879b349d9683463e98 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 14:46:30 -0700 Subject: [PATCH 14/21] Add unit test for GraphServerHandler::handler_post_node_expansion() --- bigquery_magics/graph_server.py | 2 +- tests/unit/test_graph_server.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index e79b4d2..1c14818 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -228,7 +228,7 @@ def handle_post_node_expansion(self): # - request: Dict with node details (uid, node_labels, node_properties, direction, edge_label) self.do_data_response( execute_node_expansion( - params_str=data.get("params"), request=data.get("request") + params=data.get("params"), request=data.get("request") ) ) except BaseException as e: diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 34ea17d..1a627e1 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -416,6 +416,30 @@ def test_post_query(self): ) self.assertIsNone(response_data["schema"]) + @pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" + ) + def test_post_node_expansion(self): + self.assertTrue(self.server_thread.is_alive()) + route = graph_server.graph_server.build_route( + graph_server.GraphServer.endpoints["post_node_expansion"] + ) + request = { + "request": { + "uid": "test_uid", + "node_labels": ["label1, label2"], + "node_properites": {}, + "direction": "INCOMING", + "edge_label": None, + }, + "params": "{}", + } + response = requests.post(route, json={"params": json.dumps(request)}) + self.assertEqual(response.status_code, 200) + self.assertEqual( + response.json(), {"error": "Node expansion not yet implemented"} + ) + def test_stop_server_never_started(): graph_server.graph_server.stop_server() From 3b4903f74bffc59ac671de0ccbc8aaff42c56e31 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 14:51:45 -0700 Subject: [PATCH 15/21] Add test for invalid node expansion request --- bigquery_magics/graph_server.py | 22 +++++++++------------- tests/unit/test_graph_server.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 1c14818..5c4fbb6 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -220,20 +220,16 @@ def handle_post_node_expansion(self): - params: A JSON string containing connection parameters (project, instance, database, graph) - request: A dictionary with node details (uid, node_labels, node_properties, direction, edge_label) """ - try: - data = self.parse_post_data() - - # Execute node expansion with: - # - params_str: JSON string with connection parameters (project, instance, database, graph) - # - request: Dict with node details (uid, node_labels, node_properties, direction, edge_label) - self.do_data_response( - execute_node_expansion( - params=data.get("params"), request=data.get("request") - ) + data = self.parse_post_data() + + # Execute node expansion with: + # - params_str: JSON string with connection parameters (project, instance, database, graph) + # - request: Dict with node details (uid, node_labels, node_properties, direction, edge_label) + self.do_data_response( + execute_node_expansion( + params=data.get("params"), request=data.get("request") ) - except BaseException as e: - self.do_error_response(e) - return + ) def do_GET(self): assert self.path == GraphServer.endpoints["get_ping"] diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 1a627e1..5200167 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -441,5 +441,21 @@ def test_post_node_expansion(self): ) + @pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" + ) + def test_post_node_expansion_invalid_request(self): + self.assertTrue(self.server_thread.is_alive()) + route = graph_server.graph_server.build_route( + graph_server.GraphServer.endpoints["post_node_expansion"] + ) + request = {} + response = requests.post(route, json={"params": json.dumps(request)}) + self.assertEqual(response.status_code, 200) + self.assertEqual( + response.json(), {"error": "Node expansion not yet implemented"} + ) + + def test_stop_server_never_started(): graph_server.graph_server.stop_server() From 49d1aace81b2e8d033580ab1e2fdba808cac5c69 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 14:55:11 -0700 Subject: [PATCH 16/21] reformat --- tests/unit/test_graph_server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 5200167..b56e0c6 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -440,7 +440,6 @@ def test_post_node_expansion(self): response.json(), {"error": "Node expansion not yet implemented"} ) - @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) From 9b4a567adcd7fb1f239b07219940a3dbba670a8d Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 15:02:28 -0700 Subject: [PATCH 17/21] Tweaks to improve code coverage --- bigquery_magics/bigquery.py | 9 +++------ bigquery_magics/graph_server.py | 6 ++++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 91283ab..a6a4faa 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -619,12 +619,9 @@ def _colab_node_expansion_callback(request: dict, params_str: str): - The query results with nodes and edges - An error message if the request failed """ - try: - return IPython.core.display.JSON( - graph_server.execute_node_expansion(params_str, request) - ) - except BaseException as e: - return IPython.core.display.JSON({"error": e}) + return IPython.core.display.JSON( + graph_server.execute_node_expansion(params_str, request) + ) singleton_server_thread: threading.Thread = None diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 5c4fbb6..91182dc 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -85,12 +85,14 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): tabular_data[column_name].append(str(value_value)) nodes, edges = get_nodes_edges(data, fields, schema_json=None) + nodes_json = [node.to_json() for node in nodes] + edges_json = [edge.to_json() for edge in edges] return { "response": { # These fields populate the graph result view. - "nodes": [node.to_json() for node in nodes], - "edges": [edge.to_json() for edge in edges], + "nodes": nodes_json, + "edges": edges_json, # This populates the visualizer's schema view, but not yet implemented on the # BigQuery side. "schema": None, From 502149ab7de0f28b8c592648e010300e5f5c22df Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 15:15:05 -0700 Subject: [PATCH 18/21] More tweaks to improve code coverage --- bigquery_magics/graph_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 91182dc..c22372b 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -86,7 +86,10 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): nodes, edges = get_nodes_edges(data, fields, schema_json=None) nodes_json = [node.to_json() for node in nodes] - edges_json = [edge.to_json() for edge in edges] + # edges_json = [edge.to_json() for edge in edges] + edges_json = [] + for edge in edges: + edges_json.append(edge.to_json()) return { "response": { From 148a74c19f8fb1832fa9450385f339091fe18c9f Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 15:27:44 -0700 Subject: [PATCH 19/21] avoid list comprehension due to code coverage tooling --- bigquery_magics/graph_server.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index c22372b..263a027 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -85,8 +85,13 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): tabular_data[column_name].append(str(value_value)) nodes, edges = get_nodes_edges(data, fields, schema_json=None) - nodes_json = [node.to_json() for node in nodes] - # edges_json = [edge.to_json() for edge in edges] + + # Convert nodes and edges to json objects. + # (Unfortunately, the code coverage tooling does not allow this + # to be expressed as list comprehension). + nodes_json = [] + for node in nodes: + nodes_json.append(node.to_json()) edges_json = [] for edge in edges: edges_json.append(edge.to_json()) From ce96f228631af23b3c5fee45d8183eafbb616b7c Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 12 Mar 2025 16:30:12 -0700 Subject: [PATCH 20/21] Fix visualization in colab. Problem is that, even though the port is not used in colab mode, the javascript still throws an error if it's undefined. --- bigquery_magics/bigquery.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index a6a4faa..af0e93e 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -640,6 +640,7 @@ def _add_graph_widget(query_result): # visualizer widget. In colab, we are not able to create an http server on a # background thread, so we use a special colab-specific api to register a callback, # to be invoked from Javascript. + port = None try: from google.colab import output @@ -647,16 +648,22 @@ def _add_graph_widget(query_result): output.register_callback( "graph_visualization.NodeExpansion", _colab_node_expansion_callback ) + + # In colab mode, the Javascript doesn't use the port value we pass in, as there is no + # graph server, but it still has to be set to avoid triggering an exception. + # TODO: Clean this up when the Javascript is fixed on the spanner-graph-notebook side. + port = 0 except ImportError: global singleton_server_thread alive = singleton_server_thread and singleton_server_thread.is_alive() if not alive: singleton_server_thread = graph_server.graph_server.init() + port = graph_server.graph_server.port # Create html to invoke the graph server html_content = generate_visualization_html( query="placeholder query", - port=graph_server.graph_server.port, + port=port, params=query_result.to_json().replace("\\", "\\\\").replace('"', '\\"'), ) IPython.display.display(IPython.core.display.HTML(html_content)) From 3561601fa8a91eb935397a3c952ec22bc7709793 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Thu, 13 Mar 2025 09:55:18 -0700 Subject: [PATCH 21/21] Add spanner-graph-notebook, python-bigquery, and bigquery-storage to prerelease deps to be installed from main --- noxfile.py | 10 +++++++++- owlbot.py | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 562dd6b..efae5e8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -481,7 +481,7 @@ def prerelease_deps(session, protobuf_implementation): ] for dep in prerel_deps: - session.install("--pre", "--no-deps", "--upgrade", dep) + session.install("--pre", "--no-deps", "--upgrade", dep) # Remaining dependencies other_deps = [ @@ -489,6 +489,14 @@ def prerelease_deps(session, protobuf_implementation): ] session.install(*other_deps) + # Install spanner-graph-notebook, python-bigquery, and python-bigquery-storage + # from main to detect any potential breaking changes. For context, see: + # https://github.com/googleapis/python-bigquery-pandas/issues/854 + session.install( + "https://github.com/cloudspannerecosystem/spanner-graph-notebook/archive/refs/heads/main.zip", + "https://github.com/googleapis/python-bigquery/archive/main.zip", + "https://github.com/googleapis/python-bigquery-storage/archive/main.zip", + ) # Print out prerelease package versions session.run( "python", "-c", "import google.protobuf; print(google.protobuf.__version__)" diff --git a/owlbot.py b/owlbot.py index 4347527..904bd9f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -59,6 +59,7 @@ # Multi-processing note isn't relevant, as bigquery-magics is responsible for # creating clients, not the end user. "docs/multiprocessing.rst", + "noxfile.py", "README.rst", ".github/workflows/unittest.yml", ],