88import sys
99import textwrap
1010
11-
12- def create_boilerplate_code (script_name ):
13- if script_name == "samples" :
14- boilerplate_code = [
15- "import os\n " ,
16- "from math import sqrt\n " ,
17- "from os import path\n " ,
18- "\n " ,
19- "from khiops import core as kh\n " ,
20- "\n " ,
21- ]
22- elif script_name == "samples_sklearn" :
23- boilerplate_code = [
24- "import os\n " ,
25- "import pickle\n " ,
26- "from os import path\n " ,
27- "\n " ,
28- "import pandas as pd\n " ,
29- "from sklearn import metrics\n " ,
30- "from sklearn.compose import ColumnTransformer\n " ,
31- "from sklearn.experimental import enable_hist_gradient_boosting\n " ,
32- "from sklearn.ensemble import HistGradientBoostingClassifier\n " ,
33- "from sklearn.datasets import fetch_20newsgroups\n " ,
34- "from sklearn.feature_extraction.text import HashingVectorizer\n " ,
35- "from sklearn.model_selection import train_test_split\n " ,
36- "from sklearn.pipeline import Pipeline\n " ,
37- "from sklearn.preprocessing import OneHotEncoder\n " ,
38- "\n " ,
39- "from khiops import core as kh\n " ,
40- "from khiops.sklearn import (\n " ,
41- " KhiopsClassifier,\n " ,
42- " KhiopsCoclustering,\n " ,
43- " KhiopsEncoder,\n " ,
44- " KhiopsRegressor,\n " ,
45- ")\n " ,
46- ]
47- else :
48- raise ValueError (f"Invalid samples script name '{ script_name } '" )
49- return boilerplate_code
11+ import black
5012
5113
5214def create_header_cells (script_name ):
5315 """Creates the header cells for the notebook"""
54- boilerplate_code = create_boilerplate_code (script_name )
55-
5616 # Create the boilerplate cells
5717 cells = [
5818 {
@@ -66,39 +26,41 @@ def create_header_cells(script_name):
6626 "[Khiops](https://khiops.org) before using this this notebook" ,
6727 ],
6828 },
69- {
70- "cell_type" : "code" ,
71- "execution_count" : None ,
72- "metadata" : {"collapsed" : True },
73- "outputs" : [],
74- "source" : boilerplate_code ,
75- },
7629 ]
7730 return cells
7831
7932
80- def create_sample_cell (sample_method ):
33+ def create_sample_cells (sample_method ):
8134 """Creates a code cell and an execution cell for the specified method"""
8235
36+ # Create the code block
37+ code , docstring = split_docstring (inspect .getsource (sample_method ))
38+ code = textwrap .dedent (code )
39+ code = black .format_str (code , mode = black .Mode ())
40+
8341 # Create the cell source as a list of lines
84- sample_method_source = inspect .getsource (sample_method )
85- sample_source_list = [line + "\n " for line in sample_method_source .split ("\n " )]
86- sample_source_list += ["#Run sample\n " , sample_method .__name__ + "()" ]
42+ code_list = [line + "\n " for line in code .rstrip ().split ("\n " )]
43+ code_list [- 1 ] = code_list [- 1 ].rstrip ()
8744
88- sample_execution_cell = {
89- "cell_type" : "code" ,
90- "execution_count" : None ,
91- "metadata" : {},
92- "outputs" : [],
93- "source" : sample_source_list ,
94- }
45+ sample_execution_cells = [
46+ {
47+ "cell_type" : "markdown" ,
48+ "metadata" : {},
49+ "source" : [f"### `{ sample_method .__name__ } ()`\n \n " , f"{ docstring } \n " ],
50+ },
51+ {
52+ "cell_type" : "code" ,
53+ "execution_count" : None ,
54+ "metadata" : {},
55+ "outputs" : [],
56+ "source" : code_list ,
57+ },
58+ ]
9559
96- return sample_execution_cell
60+ return sample_execution_cells
9761
9862
9963def create_rest_page_header (script_name ):
100- boilerplate_code = "" .join (create_boilerplate_code (script_name ))
101- indented_boilerplate_code = textwrap .indent (boilerplate_code , " " )
10264 subtitle = "The code snippets on this page demonstrate the basic use of the "
10365 if script_name == "samples" :
10466 title = "Samples core"
@@ -139,38 +101,37 @@ def create_rest_page_header(script_name):
139101 " from khiops.tools import download_datasets\n "
140102 " download_datasets()\n "
141103 "\n "
142- "Before copying any code snippet make sure to precede it with following\n "
143- "preamble:\n "
144- "\n "
145- ".. code-block:: python\n "
146- "\n "
147- f"{ indented_boilerplate_code } "
148104 "\n "
149105 "Samples\n "
150106 "-------\n "
151107 )
152108
153109
154- def remove_docstring (source ):
155- docstring_open = source .find ('"""' )
156- if docstring_open == - 1 :
110+ def split_docstring (source ):
111+ docstring_open_quote = source .find ('"""' )
112+ if docstring_open_quote == - 1 :
157113 source_without_docstring = sample_source
114+ docstring = ""
158115 else :
159- docstring_close = source [docstring_open + 3 :].find ('"""' )
160- source_without_docstring = source [docstring_open + 3 + docstring_close + 4 :]
161- return source_without_docstring
116+ docstring_close_quote = (
117+ docstring_open_quote + 3 + source [docstring_open_quote + 3 :].find ('"""' )
118+ )
119+ source_without_docstring = source [docstring_close_quote + 4 :]
120+ docstring = source [docstring_open_quote + 3 : docstring_close_quote ]
121+ return source_without_docstring , docstring
162122
163123
164124def create_rest_page_section (sample_function ):
165- code = f"def { sample_function .__name__ } ():\n " + remove_docstring (
166- inspect .getsource (sample_function )
167- )
168- indented_code = textwrap .indent (code , " " )
125+ code , _ = split_docstring (inspect .getsource (sample_function ))
126+ code = textwrap .dedent (code )
127+ code = black .format_str (code , mode = black .Mode ())
128+ code = textwrap .indent (code , " " )
129+ code = code .rstrip ()
169130 return (
170131 f".. autofunction:: { sample_function .__name__ } \n "
171132 ".. code-block:: python\n "
172133 "\n "
173- f"{ indented_code } "
134+ f"{ code } "
174135 )
175136
176137
@@ -184,6 +145,7 @@ def main(args):
184145
185146 # Sanity check
186147 script_path = os .path .join (args .samples_dir , f"{ script_name } .py" )
148+ print (f"Converting to format '{ args .format } ' samples script at { script_path } " )
187149 if os .path .abspath (script_path ) == os .path .abspath (args .output_path ):
188150 print ("error: input and output paths are the same" )
189151 sys .exit (1 )
@@ -210,7 +172,7 @@ def main(args):
210172 notebook_objects = {}
211173 notebook_objects ["cells" ] = create_header_cells (script_name )
212174 for sample_method in samples .exported_samples :
213- notebook_objects ["cells" ].append ( create_sample_cell (sample_method ))
175+ notebook_objects ["cells" ].extend ( create_sample_cells (sample_method ))
214176 notebook_objects ["metadata" ] = {}
215177 notebook_objects ["nbformat" ] = 4
216178 notebook_objects ["nbformat_minor" ] = 2
0 commit comments