Skip to content

Commit f214549

Browse files
Revert "Pushing change for MetaCAT training"
This reverts commit e6c0041.
1 parent e6c0041 commit f214549

File tree

2 files changed

+42
-64
lines changed

2 files changed

+42
-64
lines changed

medcat/2_train_model/2_supervised_training/meta_annotation_training.ipynb

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,19 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"id": "d58c720d",
77
"metadata": {},
88
"outputs": [],
99
"source": [
1010
"import json\n",
1111
"import os\n",
1212
"from datetime import date\n",
13+
"from medcat.cat import CAT\n",
1314
"from medcat.meta_cat import MetaCAT\n",
1415
"from medcat.config_meta_cat import ConfigMetaCAT\n",
15-
"from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBERT"
16+
"from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBPE, TokenizerWrapperBERT\n",
17+
"from tokenizers import ByteLevelBPETokenizer"
1618
]
1719
},
1820
{
@@ -76,39 +78,35 @@
7678
]
7779
},
7880
{
79-
"cell_type": "code",
80-
"execution_count": null,
81-
"id": "2933f7e1",
81+
"cell_type": "markdown",
82+
"id": "35aa5605",
8283
"metadata": {},
83-
"outputs": [],
8484
"source": [
85-
"for meta_model in meta_model_names:\n",
86-
" config_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,\"config.json\")\n",
87-
" with open(config_file, 'r') as jfile:\n",
88-
" config_dict = json.load(jfile)\n",
89-
" print(f\"Model used for meta_{meta_model}:\",config_dict['model']['model_name'])"
85+
"Before you run the next section please double check that the model meta_annotation names matches to those specified in the mct export.\n",
86+
"\n"
9087
]
9188
},
9289
{
9390
"cell_type": "markdown",
94-
"id": "3047b1d9",
91+
"id": "8bf6f5c3",
9592
"metadata": {},
9693
"source": [
97-
"<b> Note: </b> \n",
98-
" The name for the classification task can vary. <br> E.g: Task name for 'Experiencer' can be 'Subject'.\n",
99-
" <br><br>To accomodate for this, we have a list that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_category_names`\n",
100-
"<br> E.g. for Experiencer, it will be pre-loaded as alternative_category_names = ['Experiencer','Subject']"
94+
"Depending on the model pack you have, please run the LSTM model or BERT model section. <br>\n",
95+
"If you are unsure, use this section to check the model type."
10196
]
10297
},
10398
{
104-
"cell_type": "markdown",
105-
"id": "12e91f77",
99+
"cell_type": "code",
100+
"execution_count": null,
101+
"id": "2933f7e1",
106102
"metadata": {},
103+
"outputs": [],
107104
"source": [
108-
"<b> Note: </b> \n",
109-
" The name for the classes can vary too. <br> E.g: For Presence task, the class name can be 'Not present (False)' or 'False'\n",
110-
" <br><br>To accomodate for this, we have a mapping that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_class_names`\n",
111-
"<br> E.g. for Presence, it will be pre-loaded as alternative_class_names = [[\"Hypothetical (N/A)\",\"Hypothetical\"],[\"Not present (False)\",\"False\"],[\"Present (True)\",\"True\"]]"
105+
"for meta_model in meta_model_names:\n",
106+
" config_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,\"config.json\")\n",
107+
" with open(config_file, 'r') as jfile:\n",
108+
" config_dict = json.load(jfile)\n",
109+
" print(f\"Model used for meta_{meta_model}:\",config_dict['model']['model_name'])"
112110
]
113111
},
114112
{
@@ -133,11 +131,9 @@
133131
"\n",
134132
" # changing parameters\n",
135133
" mc.config.train['nepochs'] = 15\n",
136-
" \n",
137-
" # current model will be overwritten\n",
138-
" save_dir_path = os.path.join(base_dir_meta_models,\"meta_\"+meta_model)\n",
139-
" # to save the new model elsewhere, uncomment the below line\n",
140-
" #save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
134+
"\n",
135+
" save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
136+
" #Ideally this should replace the meta_models inside the modelpack\n",
141137
"\n",
142138
" # train the meta_model\n",
143139
" results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n",
@@ -151,8 +147,7 @@
151147
"id": "ab23e424",
152148
"metadata": {},
153149
"source": [
154-
"## If you dont have the model packs, and are training from scratch\n",
155-
"<b>This is very rare, it is recommended to always use the model packs and then fine-tune them</b>"
150+
"## If you dont have the model packs, and are training from scratch"
156151
]
157152
},
158153
{
@@ -172,7 +167,8 @@
172167
"\n",
173168
"tokenizer = TokenizerWrapperBERT.load(\"\", config.model['model_variant'])\n",
174169
"\n",
175-
"save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
170+
"save_dir_path= \"test_meta\" # Where to save the meta_model and results. \n",
171+
"#Ideally this should replace the meta_models inside the modelpack\n",
176172
"\n",
177173
"# Initialise and train meta_model\n",
178174
"mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n",

medcat/2_train_model/2_supervised_training/meta_annotation_training_advanced.ipynb

Lines changed: 16 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,19 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": null,
13+
"execution_count": 1,
1414
"id": "d58c720d",
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
1818
"import json\n",
1919
"import os\n",
2020
"from datetime import date\n",
21+
"from medcat.cat import CAT\n",
2122
"from medcat.meta_cat import MetaCAT\n",
22-
"from medcat.config_meta_cat import ConfigMetaCAT"
23+
"from medcat.config_meta_cat import ConfigMetaCAT\n",
24+
"from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBPE, TokenizerWrapperBERT\n",
25+
"from tokenizers import ByteLevelBPETokenizer"
2326
]
2427
},
2528
{
@@ -85,24 +88,11 @@
8588
},
8689
{
8790
"cell_type": "markdown",
88-
"id": "d4a3632b",
89-
"metadata": {},
90-
"source": [
91-
"<b> Note: </b> \n",
92-
" The name for the classification task can vary. <br> E.g: Task name for 'Experiencer' can be 'Subject'.\n",
93-
" <br><br>To accomodate for this, we have a list that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_category_names`\n",
94-
"<br> E.g. for Experiencer, it will be pre-loaded as alternative_category_names = ['Experiencer','Subject']"
95-
]
96-
},
97-
{
98-
"cell_type": "markdown",
99-
"id": "d8bdc404",
91+
"id": "35aa5605",
10092
"metadata": {},
10193
"source": [
102-
"<b> Note: </b> \n",
103-
" The name for the classes can vary too. <br> E.g: For Presence task, the class name can be 'Not present (False)' or 'False'\n",
104-
" <br><br>To accomodate for this, we have a mapping that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_class_names`\n",
105-
"<br> E.g. for Presence, it will be pre-loaded as alternative_class_names = [[\"Hypothetical (N/A)\",\"Hypothetical\"],[\"Not present (False)\",\"False\"],[\"Present (True)\",\"True\"]]"
94+
"Before you run the next section please double check that the model meta_annotation names matches to those specified in the mct export.\n",
95+
"\n"
10696
]
10797
},
10898
{
@@ -193,14 +183,9 @@
193183
" if class_wt_phase1:\n",
194184
" mc.config.train['class_weights'] = class_wt_phase1\n",
195185
"\n",
196-
" #You can change the number of epochs, remember to keep them higher for phase 1\n",
197-
" mc.config.train['nepochs'] = 40 \n",
198-
"\n",
199-
" # current model will be overwritten\n",
200-
" save_dir_path = os.path.join(base_dir_meta_models,\"meta_\"+meta_model)\n",
201-
" # to save the new model elsewhere, uncomment the below line\n",
202-
" #save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
186+
" mc.config.train['nepochs'] = 30 #You can change the number of epochs, remember to keep them higher for phase 1\n",
203187
"\n",
188+
" save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
204189
" results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n",
205190
" # Save results\n",
206191
" json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results_phase1.json'), 'w'))\n",
@@ -217,21 +202,18 @@
217202
" if class_wt_phase2:\n",
218203
" mc.config.train['class_weights'] = class_wt_phase2\n",
219204
"\n",
220-
" #You can change the number of epochs\n",
221-
" mc.config.train['nepochs'] = 20\n",
205+
" mc.config.train['nepochs'] = 15\n",
222206
"\n",
223-
" # Where to save the meta_model and results. Ensure to keep this same as Phase 1\n",
224-
" save_dir_path = os.path.join(base_dir_meta_models,\"meta_\"+meta_model)\n",
225-
" \n",
207+
" save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. Ensure to keep this same as Phase 1\n",
226208
" results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n",
227209
" # Save results\n",
228210
" json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results_phase2.json'), 'w'))\n",
229211
"\n",
230212
"#--------------------------------Driver--------------------------------\n",
231213
"for meta_model in meta_model_names:\n",
232-
" #To use your own class weights instead of the pre-defined ones for the 2 phases, put the weights in the lists below\n",
233-
" class_wt_phase1 = [] # Example [0.4,0.4,0.2]\n",
234-
" class_wt_phase2 = [] # Example [0.4,0.3,0.3]\n",
214+
" #To use your own class weights instead of the pre-defined ones for the 2 phases, uncomment the below lines\n",
215+
" '''class_wt_phase1 = []\n",
216+
" class_wt_phase2 = []'''\n",
235217
"\n",
236218
" # Train 2 phase learning\n",
237219
" logger.info(\"\\n********************Beginning Phase 1********************\")\n",
@@ -275,7 +257,7 @@
275257
"# Follow all the same steps till initializing the metacat model\n",
276258
"\n",
277259
"# Initialise and train meta_model\n",
278-
"mc = MetaCAT.load(save_dir_path=os.path.join(base_dir_meta_models,\"meta_\"+meta_model))\n",
260+
"mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n",
279261
"\n",
280262
"# the format expected is [[['text','of','the','document'], [index of medical entity], \"label\" ],\n",
281263
"# ['text','of','the','document'], [index of medical entity], \"label\" ]]\n",

0 commit comments

Comments
 (0)