Skip to content

Add support for Gremlin proxy host and Neptune HTTP query visualization #530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd
## Upcoming
- Added `--explain-type` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/503))
- Added general documentation for `%%graph_notebook_config` options ([Link to PR](https://github.com/aws/graph-notebook/pull/504))
- Added support for Gremlin proxy hosts and visualization of Neptune HTTP results ([Link to PR](https://github.com/aws/graph-notebook/pull/530))
- Modified Dockerfile to support Python 3.10 ([Link to PR](https://github.com/aws/graph-notebook/pull/519))
- Updated Docker documentation with platform-specific run commands ([Link to PR](https://github.com/aws/graph-notebook/pull/502))
- Fixed deprecation warnings in GitHub workflows ([Link to PR](https://github.com/aws/graph-notebook/pull/506))
Expand Down
35 changes: 28 additions & 7 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from SPARQLWrapper import SPARQLWrapper
from botocore.session import get_session
from gremlin_python.driver.protocol import GremlinServerError
from gremlin_python.structure.graph import Path
from IPython.core.display import HTML, display_html, display
from IPython.core.magic import (Magics, magics_class, cell_magic, line_magic, line_cell_magic, needs_local_scope)
from ipywidgets.widgets.widget_description import DescriptionStyle
Expand Down Expand Up @@ -808,15 +809,15 @@ def gremlin(self, line, cell, local_ns: dict = None):
parser.add_argument('--explain-type', type=str.lower, default='',
help='Explain mode to use when using the explain query mode.')
parser.add_argument('-p', '--path-pattern', default='', help='path pattern')
parser.add_argument('-g', '--group-by', type=str, default='T.label',
parser.add_argument('-g', '--group-by', type=str, default='',
help='Property used to group nodes (e.g. code, T.region) default is T.label')
parser.add_argument('-gd', '--group-by-depth', action='store_true', default=False,
help="Group nodes based on path hierarchy")
parser.add_argument('-gr', '--group-by-raw', action='store_true', default=False,
help="Group nodes by the raw result")
parser.add_argument('-d', '--display-property', type=str, default='T.label',
parser.add_argument('-d', '--display-property', type=str, default='',
help='Property to display the value of on each node, default is T.label')
parser.add_argument('-de', '--edge-display-property', type=str, default='T.label',
parser.add_argument('-de', '--edge-display-property', type=str, default='',
help='Property to display the value of on each edge, default is T.label')
parser.add_argument('-t', '--tooltip-property', type=str, default='',
help='Property to display the value of on each node tooltip. If not specified, tooltip '
Expand Down Expand Up @@ -936,8 +937,16 @@ def gremlin(self, line, cell, local_ns: dict = None):
else:
first_tab_html = pre_container_template.render(content='No profile found')
else:
using_http = False
query_start = time.time() * 1000 # time.time() returns time in seconds w/high precision; x1000 to get in ms
query_res = self.client.gremlin_query(cell, transport_args=transport_args)
if self.graph_notebook_config.proxy_host != '' and self.client.is_neptune_domain():
using_http = True
query_res_http = self.client.gremlin_http_query(cell, headers={'Accept': 'application/vnd.gremlin-v1.0+json;types=false'})
query_res_http.raise_for_status()
query_res_http_json = query_res_http.json()
query_res = query_res_http_json['result']['data']
else:
query_res = self.client.gremlin_query(cell, transport_args=transport_args)
query_time = time.time() * 1000 - query_start
if not args.silent:
gremlin_metadata = build_gremlin_metadata_from_query(query_type='query', results=query_res,
Expand All @@ -951,18 +960,30 @@ def gremlin(self, line, cell, local_ns: dict = None):
logger.debug(f'edge_display_property: {args.edge_display_property}')
logger.debug(f'label_max_length: {args.label_max_length}')
logger.debug(f'ignore_groups: {args.ignore_groups}')
gn = GremlinNetwork(group_by_property=args.group_by, display_property=args.display_property,
gn = GremlinNetwork(group_by_property=args.group_by,
display_property=args.display_property,
group_by_raw=args.group_by_raw,
group_by_depth=args.group_by_depth,
edge_display_property=args.edge_display_property,
tooltip_property=args.tooltip_property,
edge_tooltip_property=args.edge_tooltip_property,
label_max_length=args.label_max_length,
edge_label_max_length=args.edge_label_max_length,
ignore_groups=args.ignore_groups)
ignore_groups=args.ignore_groups,
using_http=using_http)

if using_http and 'path()' in cell and query_res:
first_path = query_res[0]
if isinstance(first_path, dict) and first_path.keys() == {'labels', 'objects'}:
query_res_to_path_type = []
for path in query_res:
new_path_list = path['objects']
new_path = Path(labels=[], objects=new_path_list)
query_res_to_path_type.append(new_path)
query_res = query_res_to_path_type

if args.path_pattern == '':
gn.add_results(query_res)
gn.add_results(query_res, is_http=using_http)
else:
pattern = parse_pattern_list_str(args.path_pattern)
gn.add_results_with_pattern(query_res, pattern)
Expand Down
42 changes: 30 additions & 12 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
from botocore.awsrequest import AWSRequest
from gremlin_python.driver import client, serializer
from gremlin_python.driver.protocol import GremlinServerError
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
from neo4j import GraphDatabase, DEFAULT_DATABASE
from neo4j.exceptions import AuthError
from base64 import b64encode
import nest_asyncio

from graph_notebook.neptune.bolt_auth_token import NeptuneBoltAuthToken


# This patch is no longer needed when graph_notebook is using the a Gremlin Python
# client >= 3.5.0 as the HashableDict is now part of that client driver.
# import graph_notebook.neptune.gremlin.graphsonV3d0_MapType_objectify_patch # noqa F401
Expand All @@ -45,7 +45,7 @@
# TODO: add doc links to each command

FORMAT_CSV = 'csv'
FORMAT_OPENCYPHER='opencypher'
FORMAT_OPENCYPHER = 'opencypher'
FORMAT_NTRIPLE = 'ntriples'
FORMAT_NQUADS = 'nquads'
FORMAT_RDFXML = 'rdfxml'
Expand Down Expand Up @@ -191,11 +191,19 @@ def is_neptune_domain(self):
return is_allowed_neptune_host(hostname=self.target_host, host_allowlist=self.neptune_hosts)

def get_uri_with_port(self, use_websocket=False, use_proxy=False):
protocol = self._http_protocol
if use_websocket is True:
protocol = self._ws_protocol
else:
protocol = self._http_protocol

uri = f'{protocol}://{self.host}:{self.port}'
if use_proxy is True:
uri_host = self.proxy_host
uri_port = self.proxy_port
else:
uri_host = self.target_host
uri_port = self.target_port

uri = f'{protocol}://{uri_host}:{uri_port}'
return uri

def sparql_query(self, query: str, headers=None, explain: str = '', path: str = '') -> requests.Response:
Expand Down Expand Up @@ -267,11 +275,20 @@ def sparql_cancel(self, query_id: str, silent: bool = False):
def get_gremlin_connection(self, transport_kwargs) -> client.Client:
nest_asyncio.apply()

ws_url = f'{self.get_uri_with_port(use_websocket=True)}/gremlin'
request = self._prepare_request('GET', ws_url)
ws_url = f'{self.get_uri_with_port(use_websocket=True, use_proxy=False)}/gremlin'
if self.proxy_host != '':
proxy_http_url = f'{self.get_uri_with_port(use_websocket=False, use_proxy=True)}/gremlin'
transport_factory_args = lambda: AiohttpTransport(call_from_event_loop=True, proxy=proxy_http_url,
**transport_kwargs)
request = self._prepare_request('GET', proxy_http_url)
else:
transport_factory_args = lambda: AiohttpTransport(**transport_kwargs)
request = self._prepare_request('GET', ws_url)

traversal_source = 'g' if self.is_neptune_domain() else self.gremlin_traversal_source
return client.Client(ws_url, traversal_source, username=self.gremlin_username,
password=self.gremlin_password, message_serializer=self.gremlin_serializer,
return client.Client(ws_url, traversal_source, transport_factory=transport_factory_args,
username=self.gremlin_username, password=self.gremlin_password,
message_serializer=self.gremlin_serializer,
headers=dict(request.headers), **transport_kwargs)

def gremlin_query(self, query, transport_args=None, bindings=None):
Expand All @@ -298,7 +315,8 @@ def gremlin_http_query(self, query, headers=None) -> requests.Response:
if headers is None:
headers = {}

uri = f'{self.get_uri_with_port()}/gremlin'
use_proxy = True if self.proxy_host != '' else False
uri = f'{self.get_uri_with_port(use_websocket=False, use_proxy=use_proxy)}/gremlin'
data = {'gremlin': query}
req = self._prepare_request('POST', uri, data=json.dumps(data), headers=headers)
res = self._http_session.send(req, verify=self.ssl_verify)
Expand Down Expand Up @@ -431,7 +449,7 @@ def stream(self, url, **kwargs) -> requests.Response:
params = {}
for k, v in kwargs.items():
params[k] = v
req = self._prepare_request('GET', url, params=params,data='')
req = self._prepare_request('GET', url, params=params, data='')
res = self._http_session.send(req, verify=self.ssl_verify)
return res.json()

Expand Down Expand Up @@ -850,7 +868,7 @@ def with_sparql_path(self, path: str):
def with_gremlin_traversal_source(self, traversal_source: str):
self.args['gremlin_traversal_source'] = traversal_source
return ClientBuilder(self.args)

def with_gremlin_login(self, username: str, password: str):
self.args['gremlin_username'] = username
self.args['gremlin_password'] = password
Expand All @@ -859,7 +877,7 @@ def with_gremlin_login(self, username: str, password: str):
def with_gremlin_serializer(self, message_serializer: str):
self.args['gremlin_serializer'] = message_serializer
return ClientBuilder(self.args)

def with_neo4j_login(self, username: str, password: str, auth: bool, database: str):
self.args['neo4j_username'] = username
self.args['neo4j_password'] = password
Expand Down
Loading