Skip to content

Commit 14cc4db

Browse files
Make samples code self-contained
1 parent 443aa3e commit 14cc4db

File tree

8 files changed

+5033
-4476
lines changed

8 files changed

+5033
-4476
lines changed

.pre-commit-config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ repos:
1616
hooks:
1717
- id: isort
1818
language_version: python3
19+
exclude: khiops/samples/samples.py|khiops/samples/samples_sklearn.py
20+
- id: isort
21+
alias: isort-samples
22+
name: isort-samples
23+
language_version: python3
24+
files: khiops/samples/samples.py|khiops/samples/samples_sklearn.py
25+
args: [--no-sections]
1926
- repo: https://github.com/lyz-code/yamlfix/
2027
rev: 1.16.0
2128
hooks:

doc/convert_samples.py

Lines changed: 42 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -8,51 +8,11 @@
88
import sys
99
import textwrap
1010

11-
12-
def create_boilerplate_code(script_name):
13-
if script_name == "samples":
14-
boilerplate_code = [
15-
"import os\n",
16-
"from math import sqrt\n",
17-
"from os import path\n",
18-
"\n",
19-
"from khiops import core as kh\n",
20-
"\n",
21-
]
22-
elif script_name == "samples_sklearn":
23-
boilerplate_code = [
24-
"import os\n",
25-
"import pickle\n",
26-
"from os import path\n",
27-
"\n",
28-
"import pandas as pd\n",
29-
"from sklearn import metrics\n",
30-
"from sklearn.compose import ColumnTransformer\n",
31-
"from sklearn.experimental import enable_hist_gradient_boosting\n",
32-
"from sklearn.ensemble import HistGradientBoostingClassifier\n",
33-
"from sklearn.datasets import fetch_20newsgroups\n",
34-
"from sklearn.feature_extraction.text import HashingVectorizer\n",
35-
"from sklearn.model_selection import train_test_split\n",
36-
"from sklearn.pipeline import Pipeline\n",
37-
"from sklearn.preprocessing import OneHotEncoder\n",
38-
"\n",
39-
"from khiops import core as kh\n",
40-
"from khiops.sklearn import (\n",
41-
" KhiopsClassifier,\n",
42-
" KhiopsCoclustering,\n",
43-
" KhiopsEncoder,\n",
44-
" KhiopsRegressor,\n",
45-
")\n",
46-
]
47-
else:
48-
raise ValueError(f"Invalid samples script name '{script_name}'")
49-
return boilerplate_code
11+
import black
5012

5113

5214
def create_header_cells(script_name):
5315
"""Creates the header cells for the notebook"""
54-
boilerplate_code = create_boilerplate_code(script_name)
55-
5616
# Create the boilerplate cells
5717
cells = [
5818
{
@@ -66,39 +26,41 @@ def create_header_cells(script_name):
6626
"[Khiops](https://khiops.org) before using this this notebook",
6727
],
6828
},
69-
{
70-
"cell_type": "code",
71-
"execution_count": None,
72-
"metadata": {"collapsed": True},
73-
"outputs": [],
74-
"source": boilerplate_code,
75-
},
7629
]
7730
return cells
7831

7932

80-
def create_sample_cell(sample_method):
33+
def create_sample_cells(sample_method):
8134
"""Creates a code cell and an execution cell for the specified method"""
8235

36+
# Create the code block
37+
code, docstring = split_docstring(inspect.getsource(sample_method))
38+
code = textwrap.dedent(code)
39+
code = black.format_str(code, mode=black.Mode())
40+
8341
# Create the cell source as a list of lines
84-
sample_method_source = inspect.getsource(sample_method)
85-
sample_source_list = [line + "\n" for line in sample_method_source.split("\n")]
86-
sample_source_list += ["#Run sample\n", sample_method.__name__ + "()"]
42+
code_list = [line + "\n" for line in code.rstrip().split("\n")]
43+
code_list[-1] = code_list[-1].rstrip()
8744

88-
sample_execution_cell = {
89-
"cell_type": "code",
90-
"execution_count": None,
91-
"metadata": {},
92-
"outputs": [],
93-
"source": sample_source_list,
94-
}
45+
sample_execution_cells = [
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [f"### `{sample_method.__name__}()`\n\n", f"{docstring}\n"],
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": None,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": code_list,
57+
},
58+
]
9559

96-
return sample_execution_cell
60+
return sample_execution_cells
9761

9862

9963
def create_rest_page_header(script_name):
100-
boilerplate_code = "".join(create_boilerplate_code(script_name))
101-
indented_boilerplate_code = textwrap.indent(boilerplate_code, " ")
10264
subtitle = "The code snippets on this page demonstrate the basic use of the "
10365
if script_name == "samples":
10466
title = "Samples core"
@@ -139,38 +101,37 @@ def create_rest_page_header(script_name):
139101
" from khiops.tools import download_datasets\n"
140102
" download_datasets()\n"
141103
"\n"
142-
"Before copying any code snippet make sure to precede it with following\n"
143-
"preamble:\n"
144-
"\n"
145-
".. code-block:: python\n"
146-
"\n"
147-
f"{indented_boilerplate_code}"
148104
"\n"
149105
"Samples\n"
150106
"-------\n"
151107
)
152108

153109

154-
def remove_docstring(source):
155-
docstring_open = source.find('"""')
156-
if docstring_open == -1:
110+
def split_docstring(source):
111+
docstring_open_quote = source.find('"""')
112+
if docstring_open_quote == -1:
157113
source_without_docstring = sample_source
114+
docstring = ""
158115
else:
159-
docstring_close = source[docstring_open + 3 :].find('"""')
160-
source_without_docstring = source[docstring_open + 3 + docstring_close + 4 :]
161-
return source_without_docstring
116+
docstring_close_quote = (
117+
docstring_open_quote + 3 + source[docstring_open_quote + 3 :].find('"""')
118+
)
119+
source_without_docstring = source[docstring_close_quote + 4 :]
120+
docstring = source[docstring_open_quote + 3 : docstring_close_quote]
121+
return source_without_docstring, docstring
162122

163123

164124
def create_rest_page_section(sample_function):
165-
code = f"def {sample_function.__name__}():\n" + remove_docstring(
166-
inspect.getsource(sample_function)
167-
)
168-
indented_code = textwrap.indent(code, " ")
125+
code, _ = split_docstring(inspect.getsource(sample_function))
126+
code = textwrap.dedent(code)
127+
code = black.format_str(code, mode=black.Mode())
128+
code = textwrap.indent(code, " ")
129+
code = code.rstrip()
169130
return (
170131
f".. autofunction:: {sample_function.__name__}\n"
171132
".. code-block:: python\n"
172133
"\n"
173-
f"{indented_code}"
134+
f"{code}"
174135
)
175136

176137

@@ -184,6 +145,7 @@ def main(args):
184145

185146
# Sanity check
186147
script_path = os.path.join(args.samples_dir, f"{script_name}.py")
148+
print(f"Converting to format '{args.format}' samples script at {script_path}")
187149
if os.path.abspath(script_path) == os.path.abspath(args.output_path):
188150
print("error: input and output paths are the same")
189151
sys.exit(1)
@@ -210,7 +172,7 @@ def main(args):
210172
notebook_objects = {}
211173
notebook_objects["cells"] = create_header_cells(script_name)
212174
for sample_method in samples.exported_samples:
213-
notebook_objects["cells"].append(create_sample_cell(sample_method))
175+
notebook_objects["cells"].extend(create_sample_cells(sample_method))
214176
notebook_objects["metadata"] = {}
215177
notebook_objects["nbformat"] = 4
216178
notebook_objects["nbformat_minor"] = 2

0 commit comments

Comments
 (0)