Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/quick-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,8 @@ jobs:

# Install dev dependencies
pip install -r requirements.txt

# Install black for the samples-generation script
pip install black
- name: Run pre-commit checks
uses: pre-commit/[email protected]
44 changes: 44 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
name: Release
on:
workflow_dispatch:
inputs:
version:
description: Version of the release
jobs:
release:
runs-on: ubuntu-22.04
permissions:
contents: write
steps:
- name: Checkout sources
uses: actions/checkout@v4
with:
ref: main
# Get Git tags so that versioneer can function correctly
# See issue https://github.com/actions/checkout/issues/701
fetch-depth: 0
- name: Update "main" branch
run: |-
# Set the git user
git config --global user.name "${{ github.triggering_actor }}"
git config --global user.email "[email protected]"

# Fetch the dev branch
git fetch origin dev
git switch dev # To activate the local copy
git switch main

# Merge dev into main, tag the merge commit
git merge --no-ff -m'Merge branch 'dev' for release ${{ inputs.version }}' dev
git tag ${{ inputs.version }}

# Make dev point to main
git switch dev
git reset --hard main

# Update remotes
git switch dev
git push
git switch main
git push
13 changes: 10 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
---
repos:
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.3.0
rev: 24.4.2
hooks:
- id: black
language_version: python3
- repo: https://github.com/pycqa/pylint
rev: v3.1.0
rev: v3.2.5
hooks:
- id: pylint
language_version: python3
Expand All @@ -16,13 +16,20 @@ repos:
hooks:
- id: isort
language_version: python3
exclude: khiops/samples/samples.py|khiops/samples/samples_sklearn.py
- id: isort
alias: isort-samples
name: isort-samples
language_version: python3
files: khiops/samples/samples.py|khiops/samples/samples_sklearn.py
args: [--no-sections]
- repo: https://github.com/lyz-code/yamlfix/
rev: 1.16.0
hooks:
- id: yamlfix
exclude: packaging/conda/meta.yaml
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.28.1
rev: 0.29.0
hooks:
- id: check-github-workflows
args: [--verbose]
Expand Down
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@
- Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
- Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.

## 10.2.2.2 - 2024-07-19

### Fixed
- (`core`) Documentation of the `specific_pairs` parameter for the `train_predictor` and
`train_recoder` core API functions.

### Deprecated
- (`core`) The following parameters of the `train_predictor` core API functions:
- `max_groups`
- `max_intervals`
- `min_group_frequency`
- `min_interval_frequency`
- `results_prefix`
- `snb_predictor`
- `univariate_predictor_number`
- `discretization_method` for supervised learning
- `grouping_method` for supervised learning

## 10.2.2.1 - 2024-07-05

### Changed
Expand Down
11 changes: 9 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,21 @@
# List of patterns, relative to source directory, that match files and directories to
# ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_templates", "_build", "Thumbs.db", ".DS_Store"]

exclude_patterns = [
"_templates",
"_build",
"Thumbs.db",
".DS_Store",
"**.ipynb_checkpoints",
]
# HTML Theme
# Theme colors and fonts come from https://brand.orange.com
html_theme = "furo"
html_theme_options = {
"light_css_variables": {
"color-brand-primary": "#FF7900",
"color-brand-content": "#F16E00",
"color-brand-visited": "#FF7900",
"color-sidebar-background": "#FFFFFF",
"color-highlighted-background": "#FFD200",
"color-admonition-title--note": "#FF7900",
Expand All @@ -79,6 +85,7 @@
"dark_css_variables": {
"color-brand-primary": "#FF7900",
"color-brand-content": "#F16E00",
"color-brand-visited": "#FF7900",
"color-sidebar-background": "#000000",
"color-highlighted-background": "#FFD200",
"color-admonition-title--note": "#FF7900",
Expand Down
122 changes: 42 additions & 80 deletions doc/convert_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,11 @@
import sys
import textwrap


def create_boilerplate_code(script_name):
if script_name == "samples":
boilerplate_code = [
"import os\n",
"from math import sqrt\n",
"from os import path\n",
"\n",
"from khiops import core as kh\n",
"\n",
]
elif script_name == "samples_sklearn":
boilerplate_code = [
"import os\n",
"import pickle\n",
"from os import path\n",
"\n",
"import pandas as pd\n",
"from sklearn import metrics\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.experimental import enable_hist_gradient_boosting\n",
"from sklearn.ensemble import HistGradientBoostingClassifier\n",
"from sklearn.datasets import fetch_20newsgroups\n",
"from sklearn.feature_extraction.text import HashingVectorizer\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"from khiops import core as kh\n",
"from khiops.sklearn import (\n",
" KhiopsClassifier,\n",
" KhiopsCoclustering,\n",
" KhiopsEncoder,\n",
" KhiopsRegressor,\n",
")\n",
]
else:
raise ValueError(f"Invalid samples script name '{script_name}'")
return boilerplate_code
import black


def create_header_cells(script_name):
"""Creates the header cells for the notebook"""
boilerplate_code = create_boilerplate_code(script_name)

# Create the boilerplate cells
cells = [
{
Expand All @@ -66,39 +26,41 @@ def create_header_cells(script_name):
"[Khiops](https://khiops.org) before using this this notebook",
],
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {"collapsed": True},
"outputs": [],
"source": boilerplate_code,
},
]
return cells


def create_sample_cell(sample_method):
def create_sample_cells(sample_method):
"""Creates a code cell and an execution cell for the specified method"""

# Create the code block
code, docstring = split_docstring(inspect.getsource(sample_method))
code = textwrap.dedent(code)
code = black.format_str(code, mode=black.Mode())

# Create the cell source as a list of lines
sample_method_source = inspect.getsource(sample_method)
sample_source_list = [line + "\n" for line in sample_method_source.split("\n")]
sample_source_list += ["#Run sample\n", sample_method.__name__ + "()"]
code_list = [line + "\n" for line in code.rstrip().split("\n")]
code_list[-1] = code_list[-1].rstrip()

sample_execution_cell = {
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": sample_source_list,
}
sample_execution_cells = [
{
"cell_type": "markdown",
"metadata": {},
"source": [f"### `{sample_method.__name__}()`\n\n", f"{docstring}\n"],
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": code_list,
},
]

return sample_execution_cell
return sample_execution_cells


def create_rest_page_header(script_name):
boilerplate_code = "".join(create_boilerplate_code(script_name))
indented_boilerplate_code = textwrap.indent(boilerplate_code, " ")
subtitle = "The code snippets on this page demonstrate the basic use of the "
if script_name == "samples":
title = "Samples core"
Expand Down Expand Up @@ -139,38 +101,37 @@ def create_rest_page_header(script_name):
" from khiops.tools import download_datasets\n"
" download_datasets()\n"
"\n"
"Before copying any code snippet make sure to precede it with following\n"
"preamble:\n"
"\n"
".. code-block:: python\n"
"\n"
f"{indented_boilerplate_code}"
"\n"
"Samples\n"
"-------\n"
)


def remove_docstring(source):
docstring_open = source.find('"""')
if docstring_open == -1:
def split_docstring(source):
docstring_open_quote = source.find('"""')
if docstring_open_quote == -1:
source_without_docstring = sample_source
docstring = ""
else:
docstring_close = source[docstring_open + 3 :].find('"""')
source_without_docstring = source[docstring_open + 3 + docstring_close + 4 :]
return source_without_docstring
docstring_close_quote = (
docstring_open_quote + 3 + source[docstring_open_quote + 3 :].find('"""')
)
source_without_docstring = source[docstring_close_quote + 4 :]
docstring = source[docstring_open_quote + 3 : docstring_close_quote]
return source_without_docstring, docstring


def create_rest_page_section(sample_function):
code = f"def {sample_function.__name__}():\n" + remove_docstring(
inspect.getsource(sample_function)
)
indented_code = textwrap.indent(code, " ")
code, _ = split_docstring(inspect.getsource(sample_function))
code = textwrap.dedent(code)
code = black.format_str(code, mode=black.Mode())
code = textwrap.indent(code, " ")
code = code.rstrip()
return (
f".. autofunction:: {sample_function.__name__}\n"
".. code-block:: python\n"
"\n"
f"{indented_code}"
f"{code}"
)


Expand All @@ -184,6 +145,7 @@ def main(args):

# Sanity check
script_path = os.path.join(args.samples_dir, f"{script_name}.py")
print(f"Converting to format '{args.format}' samples script at {script_path}")
if os.path.abspath(script_path) == os.path.abspath(args.output_path):
print("error: input and output paths are the same")
sys.exit(1)
Expand All @@ -210,7 +172,7 @@ def main(args):
notebook_objects = {}
notebook_objects["cells"] = create_header_cells(script_name)
for sample_method in samples.exported_samples:
notebook_objects["cells"].append(create_sample_cell(sample_method))
notebook_objects["cells"].extend(create_sample_cells(sample_method))
notebook_objects["metadata"] = {}
notebook_objects["nbformat"] = 4
notebook_objects["nbformat_minor"] = 2
Expand Down
Loading