diff --git a/setup.cfg b/setup.cfg index fa544bc..1fd5781 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,6 +48,9 @@ package_dir = # new major versions. This works if the required packages follow Semantic Versioning. # For more information, check out https://semver.org/. install_requires = + oold + opensemantic + opensemantic.core pydantic>=1.10.17 datamodel-code-generator>=0.25 mwclient>=0.11.0 @@ -68,7 +71,6 @@ install_requires = asyncio tqdm pybars3-wheel - backports.strenum; python_version<"3.11" [options.packages.find] where = src diff --git a/src/osw/auth.py b/src/osw/auth.py index 93e57c0..1df2f40 100644 --- a/src/osw/auth.py +++ b/src/osw/auth.py @@ -7,10 +7,10 @@ from warnings import warn import yaml +from opensemantic import OswBaseModel from pydantic.v1 import PrivateAttr from osw.defaults import paths as default_paths -from osw.model.static import OswBaseModel if TYPE_CHECKING: PossibleFilePath = Path diff --git a/src/osw/controller/database.py b/src/osw/controller/database.py index 8bc65ec..16b7a0d 100644 --- a/src/osw/controller/database.py +++ b/src/osw/controller/database.py @@ -1,5 +1,6 @@ from typing import Optional, Union +from opensemantic import OswBaseModel from sqlalchemy import URL, create_engine from sqlalchemy import text as sql_text from sqlalchemy.engine import Engine @@ -7,7 +8,6 @@ import osw.model.entity as model from osw.auth import CredentialManager from osw.core import OSW -from osw.model.static import OswBaseModel class DatabaseController(model.Database): diff --git a/src/osw/controller/page_package.py b/src/osw/controller/page_package.py index 26bc70a..e65ab44 100644 --- a/src/osw/controller/page_package.py +++ b/src/osw/controller/page_package.py @@ -6,6 +6,7 @@ from typing import Optional, Union from warnings import warn +from opensemantic import OswBaseModel from pydantic.v1 import FilePath from typing_extensions import Dict, List @@ -13,7 +14,6 @@ from osw.auth import CredentialManager from osw.model import page_package as package from osw.model.page_package import NAMESPACE_CONST_TO_NAMESPACE_MAPPING -from osw.model.static import OswBaseModel from osw.utils.regex import RegExPatternExtended from osw.wtsite import WtPage, WtSite @@ -372,6 +372,17 @@ class CreationConfig(OswBaseModel): offline_pages: Optional[Dict[str, WtPage]] = None """A dictionary of pages that are already loaded. Pages in this dictionary will not be fetched again.""" + prefer_local_pages: bool = False + """Load the pages from the local working directory + instead of the server if set to True.""" + generate_python_code: bool = False + """Whether to generate python code for the data models.""" + python_code_working_dir: Optional[Union[str, Path]] = None + """Working directory for python code generation. If set, pydantic v2 data models + will be generated in this directory, v1 models in a /v1 subdirectory. + """ + python_code_filename: Optional[str] = "_model_generated.py" + """Filename for the generated python code.""" class Config: arbitrary_types_allowed = True @@ -418,6 +429,57 @@ def create( ) }, ) + + offline_pages = creation_config.offline_pages + local_pages = {} + if creation_config.prefer_local_pages: + # Read the local pages from the package + result = wtsite.read_page_package( + WtSite.ReadPagePackageParam( + package_name=self.name, + storage_path=Path(creation_config.working_dir), + ) + ) + local_pages = {p.title: p for p in result.pages} + if offline_pages is None: + offline_pages = local_pages + else: + # Merge the local pages with the offline pages + offline_pages.update(local_pages) + + if ( + creation_config.generate_python_code + and creation_config.python_code_working_dir is not None + ): + python_code_path = Path(creation_config.python_code_working_dir) + python_code_path /= creation_config.python_code_filename + schema_titles = self.page_titles + # remove duplicates and entries in ignore_titles + schema_titles = list( + set(schema_titles) + - ( + set(creation_config.ignore_titles) + if creation_config.ignore_titles + else set() + ) + ) + # remove all schemas that do not start with "Category:" + schema_titles = [ + title for title in schema_titles if title.startswith("Category:") + ] + from osw.core import OSW + + osw_obj = OSW(site=wtsite) + + osw_obj.fetch_schema( + fetchSchemaParam=OSW.FetchSchemaParam( + schema_title=schema_titles, + offline_pages=offline_pages, + result_model_path=python_code_path, + mode="replace", + ) + ) + # Create a PagePackageConfig instance config = package.PagePackageConfig( name=self.name, @@ -432,7 +494,7 @@ def create( wtsite.create_page_package( WtSite.CreatePagePackageParam( config=config, - offline_pages=creation_config.offline_pages, + offline_pages=offline_pages, ) ) diff --git a/src/osw/core.py b/src/osw/core.py index 2e35618..8b93b08 100644 --- a/src/osw/core.py +++ b/src/osw/core.py @@ -18,13 +18,22 @@ import rdflib from jsonpath_ng.ext import parse from mwclient.client import Site +from oold.generator import Generator +from oold.model.v1 import ( + ResolveParam, + Resolver, + ResolveResult, + SetResolverParam, + set_resolver, +) +from oold.utils.codegen import OOLDJsonSchemaParser +from opensemantic import OswBaseModel from pydantic import PydanticDeprecatedSince20 from pydantic.v1 import BaseModel, Field, PrivateAttr, create_model, validator from pyld import jsonld import osw.model.entity as model from osw.defaults import params as default_params -from osw.model.static import OswBaseModel from osw.utils.oold import ( AggregateGeneratedSchemasParam, AggregateGeneratedSchemasParamMode, @@ -99,6 +108,31 @@ class Config: site: WtSite + def __init__(self, **data: Any): + super().__init__(**data) + + # implement resolver backend with osw.load_entity + class OswDefaultResolver(Resolver): + + osw_obj: OSW + + def resolve(self, request: ResolveParam): + # print("RESOLVE", request) + entities = self.osw_obj.load_entity( + OSW.LoadEntityParam(titles=request.iris) + ).entities + # create a dict with request.iris as keys and the loaded entities as values + # by iterating over both lists + nodes = {} + for iri, entity in zip(request.iris, entities): + nodes[iri] = entity + return ResolveResult(nodes=nodes) + + r = OswDefaultResolver(osw_obj=self) + set_resolver(SetResolverParam(iri="Item", resolver=r)) + set_resolver(SetResolverParam(iri="Category", resolver=r)) + set_resolver(SetResolverParam(iri="Property", resolver=r)) + @property def mw_site(self) -> Site: """Returns the mwclient Site object of the OSW instance.""" @@ -348,6 +382,17 @@ class FetchSchemaParam(BaseModel): ) legacy_generator: Optional[bool] = False """uses legacy command line for code generation if true""" + generate_annotations: Optional[bool] = True + """generate custom schema keywords in Fields and Classes. + Required to update the schema in OSW without information loss""" + offline_pages: Optional[Dict[str, WtPage]] = None + """pages to be used offline instead of fetching them from the OSW instance""" + result_model_path: Optional[Union[str, pathlib.Path]] = None + """path to the generated model file, if None, + the default path ./model/entity.py is used""" + + class Config: + arbitrary_types_allowed = True def fetch_schema(self, fetchSchemaParam: FetchSchemaParam = None) -> None: """Loads the given schemas from the OSW instance and auto-generates python @@ -370,6 +415,9 @@ def fetch_schema(self, fetchSchemaParam: FetchSchemaParam = None) -> None: schema_title=schema_title, mode=mode, legacy_generator=fetchSchemaParam.legacy_generator, + generate_annotations=fetchSchemaParam.generate_annotations, + offline_pages=fetchSchemaParam.offline_pages, + result_model_path=fetchSchemaParam.result_model_path, ) ) first = False @@ -396,6 +444,19 @@ class _FetchSchemaParam(BaseModel): ) legacy_generator: Optional[bool] = False """uses legacy command line for code generation if true""" + generate_annotations: Optional[bool] = False + """generate custom schema keywords in Fields and Classes. + Required to update the schema in OSW without information loss""" + offline_pages: Optional[Dict[str, WtPage]] = None + """pages to be used offline instead of fetching them from the OSW instance""" + result_model_path: Optional[Union[str, pathlib.Path]] = None + """path to the generated model file, if None, + the default path ./model/entity.py is used""" + fetched_schema_titles: Optional[List[str]] = [] + """keep track of fetched schema titles to prevent recursion""" + + class Config: + arbitrary_types_allowed = True def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: """Loads the given schema from the OSW instance and autogenerates python @@ -411,12 +472,23 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: if fetchSchemaParam is None: fetchSchemaParam = OSW._FetchSchemaParam() schema_title = fetchSchemaParam.schema_title + fetchSchemaParam.fetched_schema_titles.append(schema_title) root = fetchSchemaParam.root schema_name = schema_title.split(":")[-1] - page = self.site.get_page(WtSite.GetPageParam(titles=[schema_title])).pages[0] - if not page.exists: - print(f"Error: Page {schema_title} does not exist") - return + if ( + fetchSchemaParam.offline_pages is not None + and schema_title in fetchSchemaParam.offline_pages + ): + print(f"Fetch {schema_title} from offline pages") + page = fetchSchemaParam.offline_pages[schema_title] + else: + print(f"Fetch {schema_title} from online pages") + page = self.site.get_page(WtSite.GetPageParam(titles=[schema_title])).pages[ + 0 + ] + if not page.exists: + print(f"Error: Page {schema_title} does not exist") + return # not only in the JsonSchema namespace the schema is located in the main sot # in all other namespaces, the json_schema slot is used if schema_title.startswith("JsonSchema:"): @@ -433,6 +505,14 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: if (schema_str is None) or (schema_str == ""): print(f"Error: Schema {schema_title} does not exist") schema_str = "{}" # empty schema to make reference work + + generator = Generator() + schemas_for_preprocessing = [json.loads(schema_str)] + generator.preprocess( + Generator.GenerateParams(json_schemas=schemas_for_preprocessing) + ) + schema_str = json.dumps(schemas_for_preprocessing[0]) + schema = json.loads( schema_str.replace("$ref", "dollarref").replace( # '$' is a special char for root object in jsonpath @@ -441,7 +521,6 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: ) # fix https://github.com/koxudaxi/datamodel-code-generator/issues/1910 ) - print(f"Fetch {schema_title}") jsonpath_expr = parse("$..dollarref") for match in jsonpath_expr.find(schema): @@ -461,10 +540,12 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: # print(f"replace {match.value} with {value}") if ( ref_schema_title != schema_title + and ref_schema_title not in fetchSchemaParam.fetched_schema_titles ): # prevent recursion in case of self references - self._fetch_schema( - OSW._FetchSchemaParam(schema_title=ref_schema_title, root=False) - ) # resolve references recursive + _param = fetchSchemaParam.copy() + _param.root = False + _param.schema_title = ref_schema_title + self._fetch_schema(_param) # resolve references recursive model_dir_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "model" @@ -480,6 +561,10 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: # result_model_path = schema_path.replace(".json", ".py") result_model_path = os.path.join(model_dir_path, "entity.py") + if fetchSchemaParam.result_model_path: + result_model_path = fetchSchemaParam.result_model_path + if not isinstance(result_model_path, str): + result_model_path = str(result_model_path) temp_model_path = os.path.join(model_dir_path, "temp.py") if root: if fetchSchemaParam.legacy_generator: @@ -505,7 +590,7 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: --input {schema_path} \ --input-file-type jsonschema \ --output {temp_model_path} \ - --base-class osw.model.static.OswBaseModel \ + --base-class opensemantic.OswBaseModel \ --use-default \ --use-unique-items-as-set \ --enum-field-as-literal all \ @@ -522,15 +607,25 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: # suppress deprecation warnings from pydantic # see https://github.com/koxudaxi/datamodel-code-generator/issues/2213 warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20) + + if fetchSchemaParam.generate_annotations: + # monkey patch class + datamodel_code_generator.parser.jsonschema.JsonSchemaParser = ( + OOLDJsonSchemaParser + ) datamodel_code_generator.generate( input_=pathlib.Path(schema_path), input_file_type="jsonschema", output=pathlib.Path(temp_model_path), - base_class="osw.model.static.OswBaseModel", + base_class="opensemantic.OswBaseModel", # use_default=True, apply_default_values_for_required_fields=True, use_unique_items_as_set=True, - enum_field_as_literal=datamodel_code_generator.LiteralType.All, + # enum_field_as_literal=datamodel_code_generator.LiteralType.All, + enum_field_as_literal="all", + # will create MyEnum(str, Enum) instead of MyEnum(Enum) + use_subclass_enum=True, + set_default_enum_member=True, use_title_as_name=True, use_schema_description=True, use_field_description=True, @@ -538,9 +633,47 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: use_double_quotes=True, collapse_root_models=True, reuse_model=True, + field_include_all_keys=True, ) warnings.filterwarnings("default", category=PydanticDeprecatedSince20) + # note: we could use OOLDJsonSchemaParser directly (see below), + # but datamodel_code_generator.generate + # does some pre- and postprocessing we do not want to duplicate + + # data_model_type = datamodel_code_generator.DataModelType.PydanticBaseModel + # #data_model_type = DataModelType.PydanticV2BaseModel + # target_python_version = datamodel_code_generator.PythonVersion.PY_38 + # data_model_types = datamodel_code_generator.model.get_data_model_types( + # data_model_type, target_python_version + # ) + # parser = OOLDJsonSchemaParserFixedRefs( + # source=pathlib.Path(schema_path), + + # base_class="opensemantic.OswBaseModel", + # data_model_type=data_model_types.data_model, + # data_model_root_type=data_model_types.root_model, + # data_model_field_type=data_model_types.field_model, + # data_type_manager_type=data_model_types.data_type_manager, + # target_python_version=target_python_version, + + # #use_default=True, + # apply_default_values_for_required_fields=True, + # use_unique_items_as_set=True, + # enum_field_as_literal=datamodel_code_generator.LiteralType.All, + # use_title_as_name=True, + # use_schema_description=True, + # use_field_description=True, + # encoding="utf-8", + # use_double_quotes=True, + # collapse_root_models=True, + # reuse_model=True, + # #field_include_all_keys=True + # ) + # result = parser.parse() + # with open(temp_model_path, "w", encoding="utf-8") as f: + # f.write(result) + # see https://koxudaxi.github.io/datamodel-code-generator/ # --base-class OswBaseModel: use a custom base class # --custom-template-dir src/model/template_data/ @@ -590,8 +723,8 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: header = ( "from uuid import uuid4\n" "from typing import Type, TypeVar\n" - "from osw.model.static import OswBaseModel, Ontology\n" - # "from osw.model.static import *\n" + "from opensemantic import OswBaseModel\n" + # "from opensemantic import *\n" "\n" ) @@ -614,7 +747,6 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None: r"class\s*([\S]*)\s*\(\s*[\S\s]*?\s*\)\s*:.*\n" ) # match class definition [\s\S]*(?:[^\S\n]*\n){2,} for cls in re.findall(pattern, org_content): - print(cls) content = re.sub( r"(class\s*" + cls diff --git a/src/osw/data/mining.py b/src/osw/data/mining.py index 2ac86a7..579528c 100644 --- a/src/osw/data/mining.py +++ b/src/osw/data/mining.py @@ -6,5 +6,5 @@ # # from pydantic import validator # -# from osw.model.static import OswBaseModel +# from opensemantic import OswBaseModel # from osw.utils.strings import * diff --git a/src/osw/defaults.py b/src/osw/defaults.py index ee6b45b..56352bd 100644 --- a/src/osw/defaults.py +++ b/src/osw/defaults.py @@ -4,9 +4,7 @@ from pathlib import Path from typing import List, Union -from pydantic.v1 import PrivateAttr, validator - -from osw.model.static import OswBaseModel +from pydantic.v1 import BaseModel, PrivateAttr, validator PACKAGE_ROOT_PATH = Path(__file__).parents[2] SRC_PATH = PACKAGE_ROOT_PATH / "src" @@ -18,7 +16,7 @@ WIKI_DOMAIN_DEFAULT = "wiki-dev.open-semantic-lab.org" -class FilePathDefault(OswBaseModel): +class FilePathDefault(BaseModel): """A class to store the default file path. This is a helper class to make the default file path, defined within this module, accessible from a calling script.""" @@ -53,7 +51,7 @@ def get(self): return self._default -class Defaults(OswBaseModel): +class Defaults(BaseModel): """Helper class to create an inheriting classes for storing default values.""" _changed: List[str] = PrivateAttr(default_factory=list) diff --git a/src/osw/express.py b/src/osw/express.py index 7cb52a7..0cf1e47 100644 --- a/src/osw/express.py +++ b/src/osw/express.py @@ -12,6 +12,7 @@ from warnings import warn import requests +from opensemantic import OswBaseModel from pydantic.v1 import validator from typing_extensions import ( IO, @@ -30,7 +31,6 @@ from osw.core import OSW, OVERWRITE_CLASS_OPTIONS, OverwriteOptions from osw.defaults import params as default_params from osw.defaults import paths as default_paths -from osw.model.static import OswBaseModel from osw.utils.wiki import namespace_from_full_title, title_from_full_title from osw.wtsite import WtSite diff --git a/src/osw/model/entity.py b/src/osw/model/entity.py index d2310b5..dd2e437 100644 --- a/src/osw/model/entity.py +++ b/src/osw/model/entity.py @@ -1,166 +1,25 @@ -# generated by datamodel-codegen: -# filename: Item.json -# timestamp: 2024-09-12T12:35:11+00:00 - -from __future__ import annotations - -from typing import Any, List, Literal, Optional, Set, Union -from uuid import UUID, uuid4 - -from pydantic.v1 import Field, constr - -from osw.model.static import OswBaseModel - - -class ReadAccess(OswBaseModel): - level: Optional[Literal["public", "internal", "restricted"]] = Field( - None, title="Level" - ) - - -class AccessRestrictions(OswBaseModel): - read: Optional[ReadAccess] = Field(None, title="Read access") - - -class Label(OswBaseModel): - text: constr(min_length=1) = Field(..., title="Text") - lang: Optional[Literal["en", "de"]] = Field("en", title="Lang code") - - -class Description(Label): - pass - - -class WikiPage(OswBaseModel): - """ - The wiki page containing this entity - """ - - title: Optional[str] = Field(None, title="Title") - """ - The page title - """ - namespace: Optional[str] = Field(None, example="Category", title="Namespace") - """ - The page namespace - """ - - -class Meta(OswBaseModel): - uuid: UUID = Field(default_factory=uuid4, title="UUID") - wiki_page: Optional[WikiPage] = Field(None, title="Wiki page") - """ - The wiki page containing this entity - """ - change_id: Optional[List[str]] = Field(None, title="Change IDs") - """ - To keep track of concerted changes - """ - - -class Entity(OswBaseModel): - rdf_type: Optional[Set[str]] = Field(None, title="Additional RDF type(s)") - """ - Declares additional type(s) for this entity, e.g., to state that this entity has the same meaning as a term in a controlled vocabulary or ontology. This property is synonymous to the schema:additionalType and owl:sameAs. The default syntax is ontology:TermName. The ontology prefix has to be defined in the @context of the Entity, the category or any of the parent categories. The term name has to be a valid identifier in the ontology. - """ - uuid: UUID = Field(default_factory=uuid4, title="UUID") - iri: Optional[str] = Field(None, title="IRI") - """ - The Internationalized Resource Identifier (IRI) of this entity - """ - name: Optional[str] = Field(None, title="Technical name") - """ - Technical / Machine compatible name - """ - label: List[Label] = Field(..., min_items=1, title="Label(s)") - """ - At least one label is required. - """ - short_name: Optional[List[Label]] = Field(None, title="Short name(s)") - """ - Abbreviation, Acronym, etc. - """ - query_label: Optional[str] = Field(None, title="Query label") - description: Optional[List[Description]] = Field(None, title="Description") - image: Optional[str] = Field(None, title="Image") - ordering_categories: Optional[List[str]] = Field(None, title="Ordering categories") - """ - Ordering categories are used to categorize instances, e.g., according to their use but not their properties. When querying for instances of a here listed ordering category, this instance will be returned. Note: Ordering categories define no properties, while 'regular' categories define properties, which an instance assigns values to. - """ - keywords: Optional[List[str]] = Field(None, title="Keywords / Tags") - """ - Designated to the user defined categorization of this element - """ - based_on: Optional[List[str]] = Field(None, title="Based on") - """ - Other entities on which this one is based, e.g. when it is created by copying - """ - statements: Optional[ - List[Union[ObjectStatement, DataStatement, QuantityStatement]] - ] = Field(None, title="Statements") - attachments: Optional[List[str]] = Field(None, title="File attachments") - meta: Optional[Meta] = None - - -class ObjectStatement(OswBaseModel): - rdf_type: Optional[Any] = "rdf:Statement" - uuid: UUID = Field(default_factory=uuid4, title="UUID") - label: Optional[List[Label]] = Field(None, title="Label") - """ - Human readable name - """ - subject: Optional[str] = Field(None, title="Subject") - substatements: Optional[ - List[Union[ObjectStatement, DataStatement, QuantityStatement]] - ] = Field(None, title="Substatements") - predicate: str = Field(..., title="Predicate") - object: str = Field(..., title="Object") - - -class DataStatement(OswBaseModel): - rdf_type: Optional[Any] = "rdf:Statement" - uuid: UUID = Field(default_factory=uuid4, title="UUID") - label: Optional[List[Label]] = Field(None, title="Label") - """ - Human readable name - """ - subject: Optional[str] = Field(None, title="Subject") - substatements: Optional[ - List[Union[ObjectStatement, DataStatement, QuantityStatement]] - ] = Field(None, title="Substatements") - property: str = Field(..., title="Property") - value: str = Field(..., title="Value") - - -class QuantityStatement(OswBaseModel): - rdf_type: Optional[Any] = "rdf:Statement" - uuid: UUID = Field(default_factory=uuid4, title="UUID") - label: Optional[List[Label]] = Field(None, title="Label") - """ - Human readable name - """ - subject: Optional[str] = Field(None, title="Subject") - substatements: Optional[ - List[Union[ObjectStatement, DataStatement, QuantityStatement]] - ] = Field(None, title="Substatements") - quantity: str = Field(..., title="Property") - numerical_value: str = Field(..., title="Value") - unit: str = Field(..., title="Unit") - unit_symbol: str - value: str = Field(..., title="Value") - - -class Item(Entity): - type: Optional[List[str]] = Field( - ["Category:Item"], min_items=1, title="Types/Categories" - ) - entry_access: Optional[AccessRestrictions] = Field( - None, title="Access restrictions" - ) - - -Entity.update_forward_refs() -ObjectStatement.update_forward_refs() -DataStatement.update_forward_refs() -QuantityStatement.update_forward_refs() -Item.update_forward_refs() +# import all classes from opensemantic.core + +from opensemantic.core import ( # isort:skip + OswBaseModel, + Label, + Entity, + Item, + DefinedTerm, + Keyword, + IntangibleItem, + AccessRestrictions, + ReadAccess, + Meta, + WikiPage, + LangCode, + Description, + ObjectStatement, + DataStatement, + QuantityStatement, + Level, + File, + LocalFile, + RemoteFile, + WikiFile, +) # noqa: F401, E402 diff --git a/src/osw/model/static.py b/src/osw/model/static.py deleted file mode 100644 index 8b18fd7..0000000 --- a/src/osw/model/static.py +++ /dev/null @@ -1,390 +0,0 @@ -""" -This module is to be imported in the dynamically created and updated entity.py module. -""" - -from typing import TYPE_CHECKING, Literal, Optional, Type, TypeVar, Union -from uuid import UUID, uuid4 -from warnings import warn - -from pydantic.v1 import BaseModel, Field, constr - -from osw.custom_types import NoneType -from osw.utils.strings import pascal_case - -T = TypeVar("T", bound=BaseModel) - -# This is dirty, but required for autocompletion: -# https://stackoverflow.com/questions/62884543/pydantic-autocompletion-in-vs-code -# Ideally, solved by custom templates in the future: -# https://github.com/koxudaxi/datamodel-code-generator/issues/860 -# ToDo: Still needed? - -if TYPE_CHECKING: - from dataclasses import dataclass as _basemodel_decorator -else: - _basemodel_decorator = lambda x: x # noqa: E731 - - -def custom_issubclass(obj: Union[type, T], class_name: str) -> bool: - """ - Custom issubclass function that checks if the object is a subclass of a class - with the given name. - - Parameters - ---------- - obj : object - The object to check. - class_name : str - The name of the class to check against. - - Returns - ------- - bool - True if the object is a subclass of the class with the given name, - False otherwise. - """ - - def check_bases(cls, name): - if hasattr(cls, "__name__") and cls.__name__ == name: - return True - if not hasattr(cls, "__bases__"): - return False - for base in cls.__bases__: - if check_bases(base, name): - return True - return False - - return check_bases(obj, class_name) - - -def custom_isinstance(obj: Union[type, T], class_name: str) -> bool: - """ - Custom isinstance function that checks if the object is an instance of a class with - the given name. - - Parameters - ---------- - obj : object - The object to check. - class_name : str - The name of the class to check against. - - Returns - ------- - bool - True if the object is an instance of the class with the given name, - False otherwise. - """ - if not hasattr(obj, "__class__"): - return False - - return custom_issubclass(obj.__class__, class_name) - - -@_basemodel_decorator -class OswBaseModel(BaseModel): - - class Config: - """Configuration for the OswBaseModel""" - - # strict = False - # extra = "ignore" - # Additional fields are ignored - validate_assignment = True - # Ensures that the assignment of a value to a field is validated - smart_union = True - # To avoid unexpected coercing of types, the smart_union option is enabled - # See: https://docs.pydantic.dev/1.10/usage/model_config/#smart-union - # Not required in v2 as this will become the new default - - def __init__(self, **data): - if data.get("label"): - if not isinstance(data["label"], list): - raise ValueError( - "label must be a list of Label objects", - ) - labels = [] - for label in data["label"]: - if isinstance(label, dict): - labels.append(Label(**label)) - else: - # The list element should be a Label object - labels.append(label) - data["label"] = labels - # Ensure that the label attribute is a list of Label objects, but use - # custom_isinstance to avoid circular imports and ValidationError since - # osw.model.entity defines its own Label class - if not all(custom_isinstance(label, "Label") for label in data["label"]): - raise ValueError( - "label must be a list of Label objects", - ) - if data.get("name") is None and "label" in data: - data["name"] = pascal_case(data["label"][0].text) - if "uuid" not in data: - # If no uuid is provided, generate a new one - data["uuid"] = OswBaseModel._init_uuid(**data) - super().__init__(**data) - - @classmethod - def _init_uuid(cls, **data) -> UUID: - """Generates a random UUID for the entity if not provided during initialization. - This method can be overridden to generate a UUID based on the data, e.g. - for using a UUIDv5 based on the name: - ```python - def _get_uuid(**data) -> UUID: - namespace_uuid = uuid.UUID("0dd6c54a-b162-4552-bab9-9942ccaf4f41") - return uuid.uuid5(namespace_uuid, data["name"]) - ``` - """ - - # default: random UUID - return uuid4() - - def full_dict(self, **kwargs): # extent BaseClass export function - d = super().dict(**kwargs) - for key in ("_osl_template", "_osl_footer"): - if hasattr(self, key): - d[key] = getattr(self, key) - # Include selected private properties. note: private properties are not - # considered as discriminator - return d - - def cast( - self, - cls: Union[Type[T], type], - none_to_default: bool = False, - remove_extra: bool = False, - silent: bool = True, - **kwargs, - ) -> T: - """Casting self into target class - - Parameters - ---------- - cls - target class - kwargs - additional attributes to be set - none_to_default - If True, attributes that are None will be set to their default value - remove_extra - If True, extra attributes that are passed to the constructor are removed - silent - If True, no warnings are printed - Returns - ------- - instance of target class - """ - - def empty_list_or_none( - obj: Union[ - NoneType, - list, - ] - ) -> bool: - if obj is None: - return True - elif isinstance(obj, list): - if len(obj) == 0: - return True - elif len([item for item in obj if item is not None]) == 0: - return True - return False - - combined_args = {**self.dict(), **kwargs} - none_args = [] - if none_to_default: - reduced = {} - for k, v in combined_args.items(): - if empty_list_or_none(v): - none_args.append(k) - else: - reduced[k] = v - combined_args = reduced - extra_args = [] - if remove_extra: - reduced = {} - for k, v in combined_args.items(): - if k not in cls.__fields__.keys(): - extra_args.append(k) - else: - reduced[k] = v - combined_args = reduced - if not silent: - if none_to_default and none_args: - warn(f"Removed attributes with None or empty list values: {none_args}") - if remove_extra and extra_args: - warn(f"Removed extra attributes: {extra_args}") - if "type" in combined_args: - del combined_args["type"] - return cls(**combined_args) - - def cast_none_to_default(self, cls: Union[Type[T], type], **kwargs) -> T: - """Casting self into target class. If the passed attribute is None or solely - includes None values, the attribute is not passed to the instance of the - target class, which will then fall back to the default.""" - - return self.cast(cls, none_to_default=True, **kwargs) - - def get_uuid(self) -> Union[str, UUID, NoneType]: - """Getter for the attribute 'uuid' of the entity - - Returns - ------- - The uuid as a string or None if the uuid could not be determined - """ - return getattr(self, "uuid", None) - - def get_osw_id(self) -> Union[str, NoneType]: - """Determines the OSW-ID based on the entity's uuid. - - Returns - ------- - The OSW-ID as a string or None if the OSW-ID could not be determined - """ - return get_osw_id(self) - - def get_namespace(self) -> Union[str, NoneType]: - """Determines the wiki namespace based on the entity's type/class - - Returns - ------- - The namespace as a string or None if the namespace could not be determined - """ - return get_namespace(self) - - def get_title(self) -> Union[str, NoneType]: - """Determines the wiki page title based on the entity's data - - Returns - ------- - The title as a string or None if the title could not be determined - """ - return get_title(self) - - def get_iri(self) -> Union[str, NoneType]: - """Determines the IRI / wiki full title (namespace:title) based on the entity's - data - - Returns - ------- - The full title as a string or None if the title could not be determined. - """ - return get_full_title(self) - - -def get_osw_id(entity: Union[OswBaseModel, Type[OswBaseModel]]) -> Union[str, NoneType]: - """Determines the OSW-ID based on the entity's data - either from the entity's - attribute 'osw_id' or 'uuid'. - - Parameters - ---------- - entity - The entity to determine the OSW-ID for - - Returns - ------- - The OSW-ID as a string or None if the OSW-ID could not be determined - """ - osw_id = getattr(entity, "osw_id", None) - uuid = entity.get_uuid() - from_uuid = None if uuid is None else f"OSW{str(uuid).replace('-', '')}" - if osw_id is None: - return from_uuid - if osw_id != from_uuid: - raise ValueError(f"OSW-ID does not match UUID: {osw_id} != {from_uuid}") - return osw_id - - -def get_namespace( - entity: Union[OswBaseModel, Type[OswBaseModel]] -) -> Union[str, NoneType]: - """Determines the wiki namespace based on the entity's type/class - - Parameters - ---------- - entity - The entity to determine the namespace for - - Returns - ------- - The namespace as a string or None if the namespace could not be determined - """ - namespace = None - - if hasattr(entity, "meta") and entity.meta and entity.meta.wiki_page: - if entity.meta.wiki_page.namespace: - namespace = entity.meta.wiki_page.namespace - - if namespace is None: - if custom_issubclass(entity, "Entity"): - namespace = "Category" - elif custom_isinstance(entity, "Category"): - namespace = "Category" - elif custom_issubclass(entity, "Characteristic"): - namespace = "Category" - elif custom_isinstance(entity, "Item"): - namespace = "Item" - elif custom_isinstance(entity, "Property"): - namespace = "Property" - elif custom_isinstance(entity, "WikiFile"): - namespace = "File" - - return namespace - - -def get_title(entity: OswBaseModel) -> Union[str, NoneType]: - """Determines the wiki page title based on the entity's data - - Parameters - ---------- - entity - the entity to determine the title for - - Returns - ------- - the title as a string or None if the title could not be determined - """ - title = None - - if hasattr(entity, "meta") and entity.meta and entity.meta.wiki_page: - if entity.meta.wiki_page.title: - title = entity.meta.wiki_page.title - - if title is None: - title = get_osw_id(entity) - - return title - - -def get_full_title(entity: OswBaseModel) -> Union[str, NoneType]: - """determines the wiki full title (namespace:title) based on the entity's data - - Parameters - ---------- - entity - the entity to determine the full title for - - Returns - ------- - the full title as a string or None if the title could not be determined - """ - namespace = get_namespace(entity) - title = get_title(entity) - if namespace is not None and title is not None: - return namespace + ":" + title - elif title is not None: - return title - - -class Ontology(OswBaseModel): - iri: str - prefix: str - name: str - prefix_name: str - link: str - - -class Label(OswBaseModel): - text: constr(min_length=1) = Field(..., title="Text") - lang: Optional[Literal["en", "de"]] = Field("en", title="Lang code") diff --git a/src/osw/ontology.py b/src/osw/ontology.py index 749b125..a7b2472 100644 --- a/src/osw/ontology.py +++ b/src/osw/ontology.py @@ -4,13 +4,13 @@ import uuid from typing import Dict, List, Literal, Optional, Type +from opensemantic import OswBaseModel from pydantic.v1 import PrivateAttr from pyld import jsonld from rdflib import Graph from typing_extensions import deprecated from osw.core import OSW, model -from osw.model.static import OswBaseModel from osw.utils.strings import camel_case, pascal_case from osw.utils.wiki import get_namespace from osw.wtsite import WtSite @@ -810,9 +810,7 @@ def _store_ontology(self, param: StoreOntologyParam): if namespace == "Category": smw_import_type = "Category" if not hasattr(e, "subclass_of") or e.subclass_of is None: - e.subclass_of = [] - if len(e.subclass_of) == 0: - e.subclass_of.append(self.import_config.meta_class_title) + e.subclass_of = [self.import_config.meta_class_title] elif namespace == "Property": smw_import_type = "Type:" + e.cast(model.Property).property_type else: diff --git a/src/osw/utils/regex.py b/src/osw/utils/regex.py index 3012d2d..46c5a88 100644 --- a/src/osw/utils/regex.py +++ b/src/osw/utils/regex.py @@ -1,10 +1,9 @@ import re from typing import Dict, List, Optional, Union +from opensemantic import OswBaseModel from pydantic.v1 import validator -from osw.model.static import OswBaseModel - # Classes class RegExPatternExtended(OswBaseModel): diff --git a/src/osw/utils/wiki.py b/src/osw/utils/wiki.py index 3149627..2d5bda8 100644 --- a/src/osw/utils/wiki.py +++ b/src/osw/utils/wiki.py @@ -2,7 +2,7 @@ from uuid import UUID # Legacy imports: -from osw.model.static import get_full_title, get_namespace, get_title # noqa: F401 +from opensemantic import get_full_title, get_namespace, get_title # noqa: F401 def get_osw_id(uuid: UUID) -> str: diff --git a/src/osw/wiki_tools.py b/src/osw/wiki_tools.py index 6ce979f..c32294a 100644 --- a/src/osw/wiki_tools.py +++ b/src/osw/wiki_tools.py @@ -3,9 +3,9 @@ import mwclient import yaml +from opensemantic import OswBaseModel from pydantic.v1 import FilePath -from osw.model.static import OswBaseModel from osw.utils.util import parallelize # try import functions from wikitext.py (relies on the extra dependency osw[wikitext]) diff --git a/src/osw/wtsite.py b/src/osw/wtsite.py index d4e7a0e..733ebbb 100644 --- a/src/osw/wtsite.py +++ b/src/osw/wtsite.py @@ -22,6 +22,7 @@ import requests from jsonpath_ng.ext import parse from mwclient.page import Page as MwPage +from opensemantic import OswBaseModel from pydantic.v1 import FilePath from typing_extensions import deprecated @@ -29,7 +30,6 @@ import osw.utils.util as ut import osw.wiki_tools as wt from osw.auth import CredentialManager -from osw.model.static import OswBaseModel from osw.utils.regex_pattern import REGEX_PATTERN_LIB from osw.utils.util import parallelize from osw.utils.wiki import get_osw_id @@ -848,6 +848,9 @@ class ReadPagePackageParam(OswBaseModel): """A list of slots that should be read. If None, all slots are read.""" debug: Optional[bool] = False """If True, debug information is printed to the console.""" + offline: Optional[bool] = True + """Skip reading the page content from the webserver + before reading the local content, if True.""" class ReadPagePackageResult(OswBaseModel): """Return type of read_page_package.""" @@ -957,19 +960,9 @@ def get_slot_content( namespace = page["namespace"].split("_")[-1].capitalize() name = page["name"] # Create the WtPage object - page_obj = WtPage(wtSite=self, title=f"{namespace}:{name}") - if "main" in selected_slots: - # Main slot is special - slot_content = get_slot_content( - parent_dir=sub_dirs, - url_path=page["urlPath"], - files_in_storage_path=storage_path_content["files"], - ) - if slot_content is not None: - page_obj.set_slot_content( - slot_key="main", - content=slot_content, - ) + page_obj = WtPage( + wtSite=self, title=f"{namespace}:{name}", do_init=not param.offline + ) if selected_slots is None: _selected_slots = page["slots"] else: @@ -979,16 +972,29 @@ def get_slot_content( if slot_name in selected_slots } for slot_name, slot_dict in _selected_slots.items(): - slot_content = get_slot_content( - parent_dir=sub_dirs, - url_path=slot_dict["urlPath"], - files_in_storage_path=storage_path_content["files"], - ) - if slot_content is not None: - page_obj.set_slot_content( - slot_key=slot_name, - content=slot_content, + if slot_name == "main": + # Main slot is special + slot_content = get_slot_content( + parent_dir=sub_dirs, + url_path=page["urlPath"], + files_in_storage_path=storage_path_content["files"], ) + if slot_content is not None: + page_obj.set_slot_content( + slot_key="main", + content=slot_content, + ) + else: + slot_content = get_slot_content( + parent_dir=sub_dirs, + url_path=slot_dict["urlPath"], + files_in_storage_path=storage_path_content["files"], + ) + if slot_content is not None: + page_obj.set_slot_content( + slot_key=slot_name, + content=slot_content, + ) pages.append(page_obj) return WtSite.ReadPagePackageResult(pages=pages)