|
10 | 10 | }, |
11 | 11 | { |
12 | 12 | "cell_type": "code", |
13 | | - "execution_count": null, |
| 13 | + "execution_count": 1, |
14 | 14 | "id": "d58c720d", |
15 | 15 | "metadata": {}, |
16 | 16 | "outputs": [], |
17 | 17 | "source": [ |
18 | 18 | "import json\n", |
19 | 19 | "import os\n", |
20 | 20 | "from datetime import date\n", |
| 21 | + "from medcat.cat import CAT\n", |
21 | 22 | "from medcat.meta_cat import MetaCAT\n", |
22 | | - "from medcat.config_meta_cat import ConfigMetaCAT" |
| 23 | + "from medcat.config_meta_cat import ConfigMetaCAT\n", |
| 24 | + "from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBPE, TokenizerWrapperBERT\n", |
| 25 | + "from tokenizers import ByteLevelBPETokenizer" |
23 | 26 | ] |
24 | 27 | }, |
25 | 28 | { |
|
85 | 88 | }, |
86 | 89 | { |
87 | 90 | "cell_type": "markdown", |
88 | | - "id": "d4a3632b", |
89 | | - "metadata": {}, |
90 | | - "source": [ |
91 | | - "<b> Note: </b> \n", |
92 | | - " The name for the classification task can vary. <br> E.g: Task name for 'Experiencer' can be 'Subject'.\n", |
93 | | - " <br><br>To accomodate for this, we have a list that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_category_names`\n", |
94 | | - "<br> E.g. for Experiencer, it will be pre-loaded as alternative_category_names = ['Experiencer','Subject']" |
95 | | - ] |
96 | | - }, |
97 | | - { |
98 | | - "cell_type": "markdown", |
99 | | - "id": "d8bdc404", |
| 91 | + "id": "35aa5605", |
100 | 92 | "metadata": {}, |
101 | 93 | "source": [ |
102 | | - "<b> Note: </b> \n", |
103 | | - " The name for the classes can vary too. <br> E.g: For Presence task, the class name can be 'Not present (False)' or 'False'\n", |
104 | | - " <br><br>To accomodate for this, we have a mapping that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_class_names`\n", |
105 | | - "<br> E.g. for Presence, it will be pre-loaded as alternative_class_names = [[\"Hypothetical (N/A)\",\"Hypothetical\"],[\"Not present (False)\",\"False\"],[\"Present (True)\",\"True\"]]" |
| 94 | + "Before you run the next section please double check that the model meta_annotation names matches to those specified in the mct export.\n", |
| 95 | + "\n" |
106 | 96 | ] |
107 | 97 | }, |
108 | 98 | { |
|
193 | 183 | " if class_wt_phase1:\n", |
194 | 184 | " mc.config.train['class_weights'] = class_wt_phase1\n", |
195 | 185 | "\n", |
196 | | - " #You can change the number of epochs, remember to keep them higher for phase 1\n", |
197 | | - " mc.config.train['nepochs'] = 40 \n", |
198 | | - "\n", |
199 | | - " # current model will be overwritten\n", |
200 | | - " save_dir_path = os.path.join(base_dir_meta_models,\"meta_\"+meta_model)\n", |
201 | | - " # to save the new model elsewhere, uncomment the below line\n", |
202 | | - " #save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n", |
| 186 | + " mc.config.train['nepochs'] = 30 #You can change the number of epochs, remember to keep them higher for phase 1\n", |
203 | 187 | "\n", |
| 188 | + " save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n", |
204 | 189 | " results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n", |
205 | 190 | " # Save results\n", |
206 | 191 | " json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results_phase1.json'), 'w'))\n", |
|
217 | 202 | " if class_wt_phase2:\n", |
218 | 203 | " mc.config.train['class_weights'] = class_wt_phase2\n", |
219 | 204 | "\n", |
220 | | - " #You can change the number of epochs\n", |
221 | | - " mc.config.train['nepochs'] = 20\n", |
| 205 | + " mc.config.train['nepochs'] = 15\n", |
222 | 206 | "\n", |
223 | | - " # Where to save the meta_model and results. Ensure to keep this same as Phase 1\n", |
224 | | - " save_dir_path = os.path.join(base_dir_meta_models,\"meta_\"+meta_model)\n", |
225 | | - " \n", |
| 207 | + " save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. Ensure to keep this same as Phase 1\n", |
226 | 208 | " results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n", |
227 | 209 | " # Save results\n", |
228 | 210 | " json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results_phase2.json'), 'w'))\n", |
229 | 211 | "\n", |
230 | 212 | "#--------------------------------Driver--------------------------------\n", |
231 | 213 | "for meta_model in meta_model_names:\n", |
232 | | - " #To use your own class weights instead of the pre-defined ones for the 2 phases, put the weights in the lists below\n", |
233 | | - " class_wt_phase1 = [] # Example [0.4,0.4,0.2]\n", |
234 | | - " class_wt_phase2 = [] # Example [0.4,0.3,0.3]\n", |
| 214 | + " #To use your own class weights instead of the pre-defined ones for the 2 phases, uncomment the below lines\n", |
| 215 | + " '''class_wt_phase1 = []\n", |
| 216 | + " class_wt_phase2 = []'''\n", |
235 | 217 | "\n", |
236 | 218 | " # Train 2 phase learning\n", |
237 | 219 | " logger.info(\"\\n********************Beginning Phase 1********************\")\n", |
|
275 | 257 | "# Follow all the same steps till initializing the metacat model\n", |
276 | 258 | "\n", |
277 | 259 | "# Initialise and train meta_model\n", |
278 | | - "mc = MetaCAT.load(save_dir_path=os.path.join(base_dir_meta_models,\"meta_\"+meta_model))\n", |
| 260 | + "mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n", |
279 | 261 | "\n", |
280 | 262 | "# the format expected is [[['text','of','the','document'], [index of medical entity], \"label\" ],\n", |
281 | 263 | "# ['text','of','the','document'], [index of medical entity], \"label\" ]]\n", |
|
0 commit comments