Skip to content

Features/add-remote-reference-support #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
30 changes: 16 additions & 14 deletions openapi_python_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,20 @@
import shutil
import subprocess
import sys
import urllib
from enum import Enum
from pathlib import Path
from typing import Any, Dict, Optional, Sequence, Union
from typing import Any, Dict, Optional, Sequence, Union, cast

import httpcore
import httpx
import yaml
from jinja2 import BaseLoader, ChoiceLoader, Environment, FileSystemLoader, PackageLoader

from openapi_python_client import utils

from .parser import GeneratorData, import_string_from_reference
from .parser.errors import GeneratorError
from .resolver.schema_resolver import SchemaResolver
from .utils import snake_case

if sys.version_info.minor < 8: # version did not exist before 3.8, need to use a backport
Expand Down Expand Up @@ -287,20 +288,21 @@ def update_existing_client(


def _get_document(*, url: Optional[str], path: Optional[Path]) -> Union[Dict[str, Any], GeneratorError]:
yaml_bytes: bytes
if url is not None and path is not None:
return GeneratorError(header="Provide URL or Path, not both.")
if url is not None:
try:
response = httpx.get(url)
yaml_bytes = response.content
except (httpx.HTTPError, httpcore.NetworkError):
return GeneratorError(header="Could not get OpenAPI document from provided URL")
elif path is not None:
yaml_bytes = path.read_bytes()
else:

if url is None and path is None:
return GeneratorError(header="No URL or Path provided")

source = cast(Union[str, Path], (url if url is not None else path))
try:
return yaml.safe_load(yaml_bytes)
except yaml.YAMLError:
resolver = SchemaResolver(source)
result = resolver.resolve()
if len(result.errors) > 0:
return GeneratorError(header="; ".join(result.errors))
except (httpx.HTTPError, httpcore.NetworkError, urllib.error.URLError):
return GeneratorError(header="Could not get OpenAPI document from provided URL")
except Exception:
return GeneratorError(header="Invalid YAML from provided source")

return result.schema
Empty file.
22 changes: 22 additions & 0 deletions openapi_python_client/resolver/data_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import yaml

from .resolver_types import SchemaData


class DataLoader:
@classmethod
def load(cls, path: str, data: bytes) -> SchemaData:
data_type = path.split(".")[-1].casefold()

if data_type == "json":
return cls.load_json(data)
else:
return cls.load_yaml(data)

@classmethod
def load_json(cls, data: bytes) -> SchemaData:
raise NotImplementedError()

@classmethod
def load_yaml(cls, data: bytes) -> SchemaData:
return yaml.safe_load(data)
48 changes: 48 additions & 0 deletions openapi_python_client/resolver/pointer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import urllib.parse
from typing import List, Union


class Pointer:
""" https://tools.ietf.org/html/rfc6901 """

def __init__(self, pointer: str) -> None:
if pointer is None or pointer != "" and not pointer.startswith("/"):
raise ValueError(f'Invalid pointer value {pointer}, it must match: *( "/" reference-token )')

self._pointer = pointer

@property
def value(self) -> str:
return self._pointer

@property
def parent(self) -> Union["Pointer", None]:
tokens = self.tokens(False)

if len(tokens) > 1:
tokens.pop()
return Pointer("/".join(tokens))
else:
assert tokens[-1] == ""
return None

def tokens(self, unescape: bool = True) -> List[str]:
tokens = []

if unescape:
for token in self._pointer.split("/"):
tokens.append(self._unescape(token))
else:
tokens = self._pointer.split("/")

return tokens

@property
def unescapated_value(self) -> str:
return self._unescape(self._pointer)

def _unescape(self, data: str) -> str:
data = urllib.parse.unquote(data)
data = data.replace("~1", "/")
data = data.replace("~0", "~")
return data
51 changes: 51 additions & 0 deletions openapi_python_client/resolver/reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import urllib.parse

from .pointer import Pointer


class Reference:
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made some refactoring on the Reference object, that as has been quickly integrated to the ResolvedSchema, but not tested, there is maybe some thing to fix there

""" https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03 """

def __init__(self, reference: str):
self._ref = reference
self._parsed_ref = urllib.parse.urlparse(reference)

@property
def path(self) -> str:
return urllib.parse.urldefrag(self._parsed_ref.geturl()).url

@property
def pointer(self) -> Pointer:
frag = self._parsed_ref.fragment
if self.is_url() and frag != "" and not frag.startswith("/"):
frag = f"/{frag}"

return Pointer(frag)

def is_relative(self) -> bool:
""" return True if reference path is a relative path """
return not self.is_absolute()

def is_absolute(self) -> bool:
""" return True is reference path is an absolute path """
return self._parsed_ref.netloc != ""

@property
def value(self) -> str:
return self._ref

def is_url(self) -> bool:
""" return True if the reference path is pointing to an external url location """
return self.is_remote() and self._parsed_ref.netloc != ""

def is_remote(self) -> bool:
""" return True if the reference pointer is pointing to a remote document """
return not self.is_local()

def is_local(self) -> bool:
""" return True if the reference pointer is pointing to the current document """
return self._parsed_ref.path == ""

def is_full_document(self) -> bool:
""" return True if the reference pointer is pointing to the whole document content """
return self.pointer.parent is None
202 changes: 202 additions & 0 deletions openapi_python_client/resolver/resolved_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import hashlib
from typing import Any, Dict, Generator, List, Tuple, Union, cast

from .reference import Reference
from .resolver_types import SchemaData


class ResolvedSchema:
def __init__(self, root: SchemaData, refs: Dict[str, SchemaData], errors: List[str]):
self._root: SchemaData = root
self._refs: Dict[str, SchemaData] = refs
Copy link
Owner Author

@p1-ra p1-ra Feb 24, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In last refactor, the keys of refenrences dict changed from:
./TSXXXX, to /absolute/path/to/TSXXX, in order to avoid the case where two external ref have a same relative reference to them pointing to two different files.

It may need some fix in ResolvedSchema class, not tested, not sure if that change introduced behaviour change or not

self._errors: List[str] = errors
self._resolved_remotes_components: SchemaData = cast(SchemaData, {})

self._resolved_schema: SchemaData = cast(SchemaData, {})
if len(self._errors) == 0:
self._process()

@property
def schema(self) -> SchemaData:
return self._root

@property
def errors(self) -> List[str]:
return self._errors.copy()

def _process(self) -> None:
self._process_remote_paths()
self._process_remote_components(self._root)
self._root.update(self._resolved_remotes_components)
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The merge here is too wild, it need fixe. I was doing my test on: https://raw.githubusercontent.com/jdegre/5GC_APIs/master/TS29504_Nudr_DataRepository.yaml

It's #components/securitySchemes was erased by the ones of _resolved_remotes_components, leading to an invalid document. Also potentially some naming collision to handle regarding the resolved remote reference and the existing one on the root schema


def _process_remote_paths(self) -> None:
refs_to_replace = []
for owner, ref_key, ref_val in self._lookup_schema_references_in(self._root, "paths"):
ref = Reference(ref_val)

if ref.is_local():
continue

remote_path = ref.pointer.value
path = ref.path

if remote_path not in self._refs:
self._errors.append("Failed to resolve remote reference > {0}".format(remote_path))
else:
remote_schema = self._refs[remote_path]
remote_value = self._lookup_dict(remote_schema, path)
if not remote_value:
self._errors.append("Failed to read remote value {}, in remote ref {}".format(path, remote_path))
else:
refs_to_replace.append((owner, remote_schema, remote_value))

for owner, remote_schema, remote_value in refs_to_replace:
self._process_remote_components(remote_schema, remote_value, 1)
self._replace_reference_with(owner, remote_value)

def _process_remote_components(
self, owner: SchemaData, subpart: Union[SchemaData, None] = None, depth: int = 0
) -> None:
target = subpart if subpart else owner

for parent, ref_key, ref_val in self._lookup_schema_references(target):
ref = Reference(ref_val)

if ref.is_local():
# print('Found local reference >> {0}'.format(ref.value))
if depth > 0:
self._transform_to_local_components(owner, ref)
else:
remote_path = ref.pointer.value
if remote_path not in self._refs:
self._errors.append("Failed to resolve remote reference > {0}".format(remote_path))
else:
remote_owner = self._refs[remote_path]
self._transform_to_local_components(remote_owner, ref)
self._transform_to_local_ref(parent, ref)

def _transform_to_local_components(self, owner: SchemaData, ref: Reference) -> None:
self._ensure_components_dir_exists(ref)

# print('Processing remote component > {0}'.format(ref.value))
remote_component = self._lookup_dict(owner, ref.path)
pointer_parent = ref.pointer.parent

if pointer_parent is not None:
root_components_dir = self._lookup_dict(self._resolved_remotes_components, pointer_parent.value)
component_name = ref.path.split("/")[-1]

if component_name == "SorTransparentContainer" or component_name == "sorTransparentContainer":
print(ref.value)

if remote_component is None:
print("Weirdy relookup of >> {0}".format(ref.value))
assert ref.is_local() and self._lookup_dict(self._resolved_remotes_components, ref.path)
return

if "$ref" in remote_component:
subref = Reference(remote_component["$ref"])
if not subref.is_local():
print("Lookup remote ref >>> {0}".format(subref.value))
return self._process_remote_components(remote_component)

if root_components_dir:
if component_name in root_components_dir:
local_component_hash = self._reference_schema_hash(root_components_dir[component_name])
remote_component_hash = self._reference_schema_hash(remote_component)

if local_component_hash == remote_component_hash:
return
else:
pass
# print('=' * 120)
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we have a collision to handle, we have two remote components with the same name but different content, may be implemented later, but definitly required

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, we should probably extract the code responsible to detect collisions into its own class, thus later on we could customise collision detection with DI.
Assigned new reference name should also be handle there (Same goal, for easy behaviour customization)

# print('TODO: Find compoment collision to handle on >>> {0}'.format(ref.path))
# print('Local componente {0} >> {1}'.format(local_component_hash, root_components_dir[component_name]))
# print('')
# print('Remote componente {0} >> {1}'.format(remote_component_hash, remote_component))
# print('=' * 120)
else:
root_components_dir[component_name] = remote_component
self._process_remote_components(owner, remote_component, 2)

def _ensure_components_dir_exists(self, ref: Reference) -> None:
cursor = self._resolved_remotes_components
pointer_dir = ref.pointer.parent
assert pointer_dir is not None

for key in pointer_dir.value.split("/"): # noqa
if key == "":
continue

if key not in cursor:
cursor[key] = {}

cursor = cursor[key]

def _transform_to_local_ref(self, owner: Dict[str, Any], ref: Reference) -> None:
owner["$ref"] = "#{0}".format(ref.path)

def _lookup_dict(self, attr: SchemaData, query: str) -> Union[SchemaData, None]:
cursor = attr
query_parts = []

if query.startswith("/paths"):
query_parts = ["paths", query.replace("/paths//", "/").replace("/paths", "")]
else:
query_parts = query.split("/")

for key in query_parts:
if key == "":
continue

if isinstance(cursor, dict) and key in cursor:
cursor = cursor[key]
else:
return None
return cursor

def _replace_reference_with(self, root: Dict[str, Any], new_value: Dict[str, Any]) -> None:
for key in new_value:
root[key] = new_value[key]

root.pop("$ref")

def _lookup_schema_references_in(
self, attr: SchemaData, path: str
) -> Generator[Tuple[SchemaData, str, Any], None, None]:
if not isinstance(attr, dict) or path not in attr:
return

yield from self._lookup_schema_references(attr[path])

def _lookup_schema_references(self, attr: Any) -> Generator[Tuple[SchemaData, str, str], None, None]:
if isinstance(attr, dict):
for key, val in attr.items():
if key == "$ref":
yield cast(SchemaData, attr), cast(str, key), cast(str, val)
else:
yield from self._lookup_schema_references(val)

elif isinstance(attr, list):
for val in attr:
yield from self._lookup_schema_references(val)

def _reference_schema_hash(self, schema: Dict[str, Any]) -> str:
md5 = hashlib.md5()
hash_elms = []
for key in schema.keys():
if key == "description":
continue

if key == "type":
hash_elms.append(schema[key])

if key == "allOf":
for item in schema[key]:
hash_elms.append(str(item))

hash_elms.append(key)

hash_elms.sort()
md5.update(";".join(hash_elms).encode("utf-8"))
return md5.hexdigest()
3 changes: 3 additions & 0 deletions openapi_python_client/resolver/resolver_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from typing import Any, Dict, NewType

SchemaData = NewType("SchemaData", Dict[str, Any])
Loading