Skip to content

Commit fe00d30

Browse files
Merge pull request #214 from KhiopsML/dev
Release 10.2.2.2
2 parents 27c9f62 + c89b398 commit fe00d30

File tree

17 files changed

+5170
-4499
lines changed

17 files changed

+5170
-4499
lines changed

.github/workflows/quick-checks.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,8 @@ jobs:
2424
2525
# Install dev dependencies
2626
pip install -r requirements.txt
27+
28+
# Install black for the samples-generation script
29+
pip install black
2730
- name: Run pre-commit checks
2831
uses: pre-commit/[email protected]

.github/workflows/release.yml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
---
2+
name: Release
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
version:
7+
description: Version of the release
8+
jobs:
9+
release:
10+
runs-on: ubuntu-22.04
11+
permissions:
12+
contents: write
13+
steps:
14+
- name: Checkout sources
15+
uses: actions/checkout@v4
16+
with:
17+
ref: main
18+
# Get Git tags so that versioneer can function correctly
19+
# See issue https://github.com/actions/checkout/issues/701
20+
fetch-depth: 0
21+
- name: Update "main" branch
22+
run: |-
23+
# Set the git user
24+
git config --global user.name "${{ github.triggering_actor }}"
25+
git config --global user.email "[email protected]"
26+
27+
# Fetch the dev branch
28+
git fetch origin dev
29+
git switch dev # To activate the local copy
30+
git switch main
31+
32+
# Merge dev into main, tag the merge commit
33+
git merge --no-ff -m'Merge branch 'dev' for release ${{ inputs.version }}' dev
34+
git tag ${{ inputs.version }}
35+
36+
# Make dev point to main
37+
git switch dev
38+
git reset --hard main
39+
40+
# Update remotes
41+
git switch dev
42+
git push
43+
git switch main
44+
git push

.pre-commit-config.yaml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
---
22
repos:
33
- repo: https://github.com/psf/black-pre-commit-mirror
4-
rev: 24.3.0
4+
rev: 24.4.2
55
hooks:
66
- id: black
77
language_version: python3
88
- repo: https://github.com/pycqa/pylint
9-
rev: v3.1.0
9+
rev: v3.2.5
1010
hooks:
1111
- id: pylint
1212
language_version: python3
@@ -16,13 +16,20 @@ repos:
1616
hooks:
1717
- id: isort
1818
language_version: python3
19+
exclude: khiops/samples/samples.py|khiops/samples/samples_sklearn.py
20+
- id: isort
21+
alias: isort-samples
22+
name: isort-samples
23+
language_version: python3
24+
files: khiops/samples/samples.py|khiops/samples/samples_sklearn.py
25+
args: [--no-sections]
1926
- repo: https://github.com/lyz-code/yamlfix/
2027
rev: 1.16.0
2128
hooks:
2229
- id: yamlfix
2330
exclude: packaging/conda/meta.yaml
2431
- repo: https://github.com/python-jsonschema/check-jsonschema
25-
rev: 0.28.1
32+
rev: 0.29.0
2633
hooks:
2734
- id: check-github-workflows
2835
args: [--verbose]

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,24 @@
66
- Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
77
- Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.
88

9+
## 10.2.2.2 - 2024-07-19
10+
11+
### Fixed
12+
- (`core`) Documentation of the `specific_pairs` parameter for the `train_predictor` and
13+
`train_recoder` core API functions.
14+
15+
### Deprecated
16+
- (`core`) The following parameters of the `train_predictor` core API functions:
17+
- `max_groups`
18+
- `max_intervals`
19+
- `min_group_frequency`
20+
- `min_interval_frequency`
21+
- `results_prefix`
22+
- `snb_predictor`
23+
- `univariate_predictor_number`
24+
- `discretization_method` for supervised learning
25+
- `grouping_method` for supervised learning
26+
927
## 10.2.2.1 - 2024-07-05
1028

1129
### Changed

doc/conf.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,21 @@
6161
# List of patterns, relative to source directory, that match files and directories to
6262
# ignore when looking for source files.
6363
# This pattern also affects html_static_path and html_extra_path.
64-
exclude_patterns = ["_templates", "_build", "Thumbs.db", ".DS_Store"]
65-
64+
exclude_patterns = [
65+
"_templates",
66+
"_build",
67+
"Thumbs.db",
68+
".DS_Store",
69+
"**.ipynb_checkpoints",
70+
]
6671
# HTML Theme
6772
# Theme colors and fonts come from https://brand.orange.com
6873
html_theme = "furo"
6974
html_theme_options = {
7075
"light_css_variables": {
7176
"color-brand-primary": "#FF7900",
7277
"color-brand-content": "#F16E00",
78+
"color-brand-visited": "#FF7900",
7379
"color-sidebar-background": "#FFFFFF",
7480
"color-highlighted-background": "#FFD200",
7581
"color-admonition-title--note": "#FF7900",
@@ -79,6 +85,7 @@
7985
"dark_css_variables": {
8086
"color-brand-primary": "#FF7900",
8187
"color-brand-content": "#F16E00",
88+
"color-brand-visited": "#FF7900",
8289
"color-sidebar-background": "#000000",
8390
"color-highlighted-background": "#FFD200",
8491
"color-admonition-title--note": "#FF7900",

doc/convert_samples.py

Lines changed: 42 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -8,51 +8,11 @@
88
import sys
99
import textwrap
1010

11-
12-
def create_boilerplate_code(script_name):
13-
if script_name == "samples":
14-
boilerplate_code = [
15-
"import os\n",
16-
"from math import sqrt\n",
17-
"from os import path\n",
18-
"\n",
19-
"from khiops import core as kh\n",
20-
"\n",
21-
]
22-
elif script_name == "samples_sklearn":
23-
boilerplate_code = [
24-
"import os\n",
25-
"import pickle\n",
26-
"from os import path\n",
27-
"\n",
28-
"import pandas as pd\n",
29-
"from sklearn import metrics\n",
30-
"from sklearn.compose import ColumnTransformer\n",
31-
"from sklearn.experimental import enable_hist_gradient_boosting\n",
32-
"from sklearn.ensemble import HistGradientBoostingClassifier\n",
33-
"from sklearn.datasets import fetch_20newsgroups\n",
34-
"from sklearn.feature_extraction.text import HashingVectorizer\n",
35-
"from sklearn.model_selection import train_test_split\n",
36-
"from sklearn.pipeline import Pipeline\n",
37-
"from sklearn.preprocessing import OneHotEncoder\n",
38-
"\n",
39-
"from khiops import core as kh\n",
40-
"from khiops.sklearn import (\n",
41-
" KhiopsClassifier,\n",
42-
" KhiopsCoclustering,\n",
43-
" KhiopsEncoder,\n",
44-
" KhiopsRegressor,\n",
45-
")\n",
46-
]
47-
else:
48-
raise ValueError(f"Invalid samples script name '{script_name}'")
49-
return boilerplate_code
11+
import black
5012

5113

5214
def create_header_cells(script_name):
5315
"""Creates the header cells for the notebook"""
54-
boilerplate_code = create_boilerplate_code(script_name)
55-
5616
# Create the boilerplate cells
5717
cells = [
5818
{
@@ -66,39 +26,41 @@ def create_header_cells(script_name):
6626
"[Khiops](https://khiops.org) before using this this notebook",
6727
],
6828
},
69-
{
70-
"cell_type": "code",
71-
"execution_count": None,
72-
"metadata": {"collapsed": True},
73-
"outputs": [],
74-
"source": boilerplate_code,
75-
},
7629
]
7730
return cells
7831

7932

80-
def create_sample_cell(sample_method):
33+
def create_sample_cells(sample_method):
8134
"""Creates a code cell and an execution cell for the specified method"""
8235

36+
# Create the code block
37+
code, docstring = split_docstring(inspect.getsource(sample_method))
38+
code = textwrap.dedent(code)
39+
code = black.format_str(code, mode=black.Mode())
40+
8341
# Create the cell source as a list of lines
84-
sample_method_source = inspect.getsource(sample_method)
85-
sample_source_list = [line + "\n" for line in sample_method_source.split("\n")]
86-
sample_source_list += ["#Run sample\n", sample_method.__name__ + "()"]
42+
code_list = [line + "\n" for line in code.rstrip().split("\n")]
43+
code_list[-1] = code_list[-1].rstrip()
8744

88-
sample_execution_cell = {
89-
"cell_type": "code",
90-
"execution_count": None,
91-
"metadata": {},
92-
"outputs": [],
93-
"source": sample_source_list,
94-
}
45+
sample_execution_cells = [
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [f"### `{sample_method.__name__}()`\n\n", f"{docstring}\n"],
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": None,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": code_list,
57+
},
58+
]
9559

96-
return sample_execution_cell
60+
return sample_execution_cells
9761

9862

9963
def create_rest_page_header(script_name):
100-
boilerplate_code = "".join(create_boilerplate_code(script_name))
101-
indented_boilerplate_code = textwrap.indent(boilerplate_code, " ")
10264
subtitle = "The code snippets on this page demonstrate the basic use of the "
10365
if script_name == "samples":
10466
title = "Samples core"
@@ -139,38 +101,37 @@ def create_rest_page_header(script_name):
139101
" from khiops.tools import download_datasets\n"
140102
" download_datasets()\n"
141103
"\n"
142-
"Before copying any code snippet make sure to precede it with following\n"
143-
"preamble:\n"
144-
"\n"
145-
".. code-block:: python\n"
146-
"\n"
147-
f"{indented_boilerplate_code}"
148104
"\n"
149105
"Samples\n"
150106
"-------\n"
151107
)
152108

153109

154-
def remove_docstring(source):
155-
docstring_open = source.find('"""')
156-
if docstring_open == -1:
110+
def split_docstring(source):
111+
docstring_open_quote = source.find('"""')
112+
if docstring_open_quote == -1:
157113
source_without_docstring = sample_source
114+
docstring = ""
158115
else:
159-
docstring_close = source[docstring_open + 3 :].find('"""')
160-
source_without_docstring = source[docstring_open + 3 + docstring_close + 4 :]
161-
return source_without_docstring
116+
docstring_close_quote = (
117+
docstring_open_quote + 3 + source[docstring_open_quote + 3 :].find('"""')
118+
)
119+
source_without_docstring = source[docstring_close_quote + 4 :]
120+
docstring = source[docstring_open_quote + 3 : docstring_close_quote]
121+
return source_without_docstring, docstring
162122

163123

164124
def create_rest_page_section(sample_function):
165-
code = f"def {sample_function.__name__}():\n" + remove_docstring(
166-
inspect.getsource(sample_function)
167-
)
168-
indented_code = textwrap.indent(code, " ")
125+
code, _ = split_docstring(inspect.getsource(sample_function))
126+
code = textwrap.dedent(code)
127+
code = black.format_str(code, mode=black.Mode())
128+
code = textwrap.indent(code, " ")
129+
code = code.rstrip()
169130
return (
170131
f".. autofunction:: {sample_function.__name__}\n"
171132
".. code-block:: python\n"
172133
"\n"
173-
f"{indented_code}"
134+
f"{code}"
174135
)
175136

176137

@@ -184,6 +145,7 @@ def main(args):
184145

185146
# Sanity check
186147
script_path = os.path.join(args.samples_dir, f"{script_name}.py")
148+
print(f"Converting to format '{args.format}' samples script at {script_path}")
187149
if os.path.abspath(script_path) == os.path.abspath(args.output_path):
188150
print("error: input and output paths are the same")
189151
sys.exit(1)
@@ -210,7 +172,7 @@ def main(args):
210172
notebook_objects = {}
211173
notebook_objects["cells"] = create_header_cells(script_name)
212174
for sample_method in samples.exported_samples:
213-
notebook_objects["cells"].append(create_sample_cell(sample_method))
175+
notebook_objects["cells"].extend(create_sample_cells(sample_method))
214176
notebook_objects["metadata"] = {}
215177
notebook_objects["nbformat"] = 4
216178
notebook_objects["nbformat_minor"] = 2

0 commit comments

Comments
 (0)