Skip to content

Replace pony with sqlalchemy>=1.4.36. #387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/rtd_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ dependencies:
- click-default-group
- networkx >=2.4
- pluggy
- pony >=0.7.15
- pybaum >=0.1.1
- pexpect
- rich
- sqlalchemy >=1.4.36
- tomli >=1.0.0

- pip:
Expand Down
1 change: 1 addition & 0 deletions docs/source/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
## 0.4.0 - 2023-xx-xx

- {pull}`323` remove Python 3.7 support and use a new Github action to provide mamba.
- {pull}`387` replaces pony with sqlalchemy.

## 0.3.2 - 2023-06-07

Expand Down
17 changes: 17 additions & 0 deletions docs/source/reference_guides/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,23 @@ are welcome to also support macOS.

````

````{confval} database_url

pytask uses a database to keep track of tasks, products, and dependencies over runs. By
default, it will create an SQLITE database in the project's root directory called
`.pytask.sqlite3`. If you want to use a different name or a different dialect
[supported by sqlalchemy](https://docs.sqlalchemy.org/en/latest/core/engines.html#backend-specific-urls),
use either {option}`pytask build --database-url` or `database_url` in the config.

```toml
database_url = "sqlite:///.pytask.sqlite3"
```

Relative paths for SQLITE databases are interpreted as either relative to the
configuration file or the root directory.

````

````{confval} editor_url_scheme

Depending on your terminal, pytask is able to turn task ids into clickable links to the
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ dependencies:
- click-default-group
- networkx >=2.4
- pluggy
- pony >=0.7.15
- pybaum >=0.1.1
- rich
- sqlalchemy >=1.4.36
- tomli >=1.0.0

# Misc
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ install_requires =
networkx>=2.4
packaging
pluggy
pony>=0.7.15
pybaum>=0.1.1
rich
sqlalchemy>=1.4.36
tomli>=1.0.0
python_requires = >=3.8
include_package_data = True
Expand Down
5 changes: 4 additions & 1 deletion src/_pytask/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,10 @@ def _collect_all_paths_known_to_pytask(session: Session) -> set[Path]:
if session.config["config"]:
known_paths.add(session.config["config"])
known_paths.add(session.config["root"])
known_paths.add(session.config["database_filename"])

database_url = session.config["database_url"]
if database_url.drivername == "sqlite" and database_url.database:
known_paths.add(Path(database_url.database))

# Add files tracked by git.
if is_git_installed():
Expand Down
4 changes: 2 additions & 2 deletions src/_pytask/click.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,11 @@ def _format_help_text( # noqa: C901, PLR0912, PLR0915

if show_default_is_str or (show_default and (default_value is not None)):
if show_default_is_str:
default_string = f"({param.show_default})" # type: ignore[attr-defined]
default_string = param.show_default # type: ignore[attr-defined]
elif isinstance(default_value, (list, tuple)):
default_string = ", ".join(str(d) for d in default_value)
elif inspect.isfunction(default_value):
default_string = _("(dynamic)")
default_string = _("dynamic")
elif param.is_bool_flag and param.secondary_opts: # type: ignore[attr-defined]
# For boolean flags that have distinct True/False opts,
# use the opt without prefix instead of the value.
Expand Down
11 changes: 5 additions & 6 deletions src/_pytask/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from _pytask.dag_utils import node_and_neighbors
from _pytask.dag_utils import task_and_descending_tasks
from _pytask.dag_utils import TopologicalSorter
from _pytask.database_utils import DatabaseSession
from _pytask.database_utils import State
from _pytask.exceptions import ResolvingDependenciesError
from _pytask.mark import Mark
Expand All @@ -30,7 +31,6 @@
from _pytask.shared import reduce_names_of_multiple_nodes
from _pytask.shared import reduce_node_name
from _pytask.traceback import render_exc_info
from pony import orm
from pybaum import tree_map
from rich.text import Text
from rich.tree import Tree
Expand Down Expand Up @@ -126,7 +126,6 @@ def _have_task_or_neighbors_changed(
)


@orm.db_session
@hookimpl(trylast=True)
def pytask_dag_has_node_changed(node: MetaNode, task_name: str) -> bool:
"""Indicate whether a single dependency or product has changed."""
Expand All @@ -136,11 +135,11 @@ def pytask_dag_has_node_changed(node: MetaNode, task_name: str) -> bool:
if file_state is None:
return True

with DatabaseSession() as session:
db_state = session.get(State, (task_name, node.name))

# If the node is not in the database.
try:
name = node.name
db_state = State[task_name, name] # type: ignore[type-arg, valid-type]
except orm.ObjectNotFound:
if db_state is None:
return True

# If the modification times match, the node has not been changed.
Expand Down
91 changes: 24 additions & 67 deletions src/_pytask/database.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,43 @@
"""Implement the database managed with pony."""
"""Contains hooks related to the database."""
from __future__ import annotations

import enum
from pathlib import Path
from typing import Any

import click
from _pytask.click import EnumChoice
from _pytask.config import hookimpl
from _pytask.database_utils import create_database
from click import Context


class _DatabaseProviders(enum.Enum):
SQLITE = "sqlite"
POSTGRES = "postgres"
MYSQL = "mysql"
ORACLE = "oracle"
COCKROACH = "cockroach"


def _database_filename_callback(
ctx: Context, name: str, value: str | None # noqa: ARG001
) -> str | None:
if value is None:
return ctx.params["root"].joinpath(".pytask.sqlite3")
return value


@hookimpl
def pytask_extend_command_line_interface(cli: click.Group) -> None:
"""Extend command line interface."""
additional_parameters = [
click.Option(
["--database-provider"],
type=EnumChoice(_DatabaseProviders),
help=(
"Database provider. All providers except sqlite are considered "
"experimental."
),
default=_DatabaseProviders.SQLITE,
),
click.Option(
["--database-filename"],
type=click.Path(file_okay=True, dir_okay=False, path_type=Path),
help=("Path to database relative to root."),
default=Path(".pytask.sqlite3"),
callback=_database_filename_callback,
),
click.Option(
["--database-create-db"],
type=bool,
help="Create database if it does not exist.",
default=True,
),
click.Option(
["--database-create-tables"],
type=bool,
help="Create tables if they do not exist.",
default=True,
),
]
cli.commands["build"].params.extend(additional_parameters)
from sqlalchemy.engine import make_url


@hookimpl
def pytask_parse_config(config: dict[str, Any]) -> None:
"""Parse the configuration."""
if not config["database_filename"].is_absolute():
config["database_filename"] = config["root"].joinpath(
config["database_filename"]
# Set default.
if not config["database_url"]:
config["database_url"] = make_url(
f"sqlite:///{config['root'].as_posix()}/.pytask.sqlite3"
)

config["database"] = {
"provider": config["database_provider"].value,
"filename": config["database_filename"].as_posix(),
"create_db": config["database_create_db"],
"create_tables": config["database_create_tables"],
}
if (
config["database_url"].drivername == "sqlite"
and config["database_url"].database
) and not Path(config["database_url"].database).is_absolute():
if config["config"]:
full_path = (
config["config"]
.parent.joinpath(config["database_url"].database)
.resolve()
)
else:
full_path = (
config["root"].joinpath(config["database_url"].database).resolve()
)
config["database_url"] = config["database_url"]._replace(
database=full_path.as_posix()
)


@hookimpl
def pytask_post_parse(config: dict[str, Any]) -> None:
"""Post-parse the configuration."""
create_database(**config["database"])
create_database(config["database_url"])
68 changes: 39 additions & 29 deletions src/_pytask/database_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,54 +6,64 @@
from _pytask.dag_utils import node_and_neighbors
from _pytask.nodes import Task
from _pytask.session import Session
from pony import orm
from sqlalchemy import Column
from sqlalchemy import create_engine
from sqlalchemy import String
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import sessionmaker


__all__ = ["create_database", "db", "update_states_in_database"]
__all__ = ["create_database", "update_states_in_database", "DatabaseSession"]


db = orm.Database()
DatabaseSession = sessionmaker()


class State(db.Entity): # type: ignore[name-defined]
Base = declarative_base()


class State(Base): # type: ignore[valid-type, misc]
"""Represent the state of a node in relation to a task."""

task = orm.Required(str)
node = orm.Required(str)
modification_time = orm.Required(str)
file_hash = orm.Optional(str)
__tablename__ = "state"

orm.PrimaryKey(task, node)
task = Column(String, primary_key=True)
node = Column(String, primary_key=True)
modification_time = Column(String)
file_hash = Column(String)


def create_database(
provider: str, filename: str, *, create_db: bool, create_tables: bool
) -> None:
def create_database(url: str) -> None:
"""Create the database."""
try:
db.bind(provider=provider, filename=filename, create_db=create_db)
db.generate_mapping(create_tables=create_tables)
except orm.BindingError:
pass
engine = create_engine(url)
Base.metadata.create_all(bind=engine)
DatabaseSession.configure(bind=engine)
except Exception:
raise


@orm.db_session
def _create_or_update_state(
first_key: str, second_key: str, modification_time: str, file_hash: str
) -> None:
"""Create or update a state."""
try:
state_in_db = State[first_key, second_key] # type: ignore[type-arg, valid-type]
except orm.ObjectNotFound:
State(
task=first_key,
node=second_key,
modification_time=modification_time,
file_hash=file_hash,
)
else:
state_in_db.modification_time = modification_time
state_in_db.file_hash = file_hash
with DatabaseSession() as session:
state_in_db = session.get(State, (first_key, second_key))

if not state_in_db:
session.add(
State(
task=first_key,
node=second_key,
modification_time=modification_time,
file_hash=file_hash,
)
)
else:
state_in_db.modification_time = modification_time
state_in_db.file_hash = file_hash

session.commit()


def update_states_in_database(session: Session, task_name: str) -> None:
Expand Down
29 changes: 28 additions & 1 deletion src/_pytask/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
import click
from _pytask.config import hookimpl
from _pytask.config_utils import set_defaults_from_config
from click import Context
from sqlalchemy.engine import make_url
from sqlalchemy.engine import URL
from sqlalchemy.exc import ArgumentError


_CONFIG_OPTION = click.Option(
Expand Down Expand Up @@ -67,11 +71,34 @@
"""click.Option: An option to embed URLs in task ids."""


def _database_url_callback(
ctx: Context, name: str, value: str | None # noqa: ARG001
) -> URL:
try:
return make_url(value)
except ArgumentError:
raise click.BadParameter(
"The 'database_url' must conform to sqlalchemy's url standard: "
"https://docs.sqlalchemy.org/en/latest/core/engines.html"
"#backend-specific-urls."
) from None


_DATABASE_URL_OPTION = click.Option(
["--database-url"],
type=str,
help=("Url to the database."),
default=None,
show_default="sqlite:///.../.pytask.sqlite3",
callback=_database_url_callback,
)


@hookimpl(trylast=True)
def pytask_extend_command_line_interface(cli: click.Group) -> None:
"""Register general markers."""
for command in ("build", "clean", "collect", "dag", "profile"):
cli.commands[command].params.append(_PATH_ARGUMENT)
cli.commands[command].params.extend([_PATH_ARGUMENT, _DATABASE_URL_OPTION])
for command in ("build", "clean", "collect", "dag", "markers", "profile"):
cli.commands[command].params.append(_CONFIG_OPTION)
for command in ("build", "clean", "collect", "profile"):
Expand Down
Loading