@@ -78,7 +78,7 @@ def progress_bar_metrics(self, progress_bar_metrics: Dict) -> None:
78
78
79
79
@property
80
80
def cached_results (self ) -> Union [EpochResultStore , None ]:
81
- return self ._cached_results .get (self .trainer ._running_stage ) # type: ignore
81
+ return self ._cached_results .get (self .trainer ._running_stage )
82
82
83
83
def get_metrics (self , key : str ) -> Dict :
84
84
metrics_holder : MetricsHolder = getattr (self , f"_{ key } " )
@@ -121,8 +121,6 @@ def cache_logged_metrics(self):
121
121
def on_trainer_init (self , logger , flush_logs_every_n_steps : int , log_every_n_steps : int , move_metrics_to_cpu : bool ):
122
122
# logging
123
123
self .configure_logger (logger )
124
- # todo: IDE is complaining, these shall be initialized in the Trainer init at leas as placeholders
125
- # and assign here the desired value
126
124
self .trainer .flush_logs_every_n_steps = flush_logs_every_n_steps
127
125
self .trainer .log_every_n_steps = log_every_n_steps
128
126
self .trainer .move_metrics_to_cpu = move_metrics_to_cpu
@@ -185,9 +183,6 @@ def cache_training_step_metrics(self, opt_closure_result):
185
183
batch_log_metrics = opt_closure_result .training_step_output .log_metrics
186
184
logged_metrics_tmp .update (batch_log_metrics )
187
185
188
- callback_metrics = opt_closure_result .training_step_output .callback_metrics
189
- callback_metrics_tmp .update (callback_metrics )
190
-
191
186
batch_pbar_metrics = opt_closure_result .training_step_output .pbar_on_batch_end
192
187
pbar_metrics_tmp .update (batch_pbar_metrics )
193
188
@@ -210,9 +205,6 @@ def log_metrics(self, metrics, grad_norm_dic, step=None):
210
205
metrics (dict): Metric values
211
206
grad_norm_dic (dict): Gradient norms
212
207
step (int): Step for which metrics should be logged. Default value corresponds to `self.global_step`
213
- log_train_step_metrics (bool): Used to track if `log_metrics` function is being called in during training
214
- steps. In training steps, we will log metrics on step: `total_nb_idx` (for accumulated gradients)
215
- and global_step for the rest.
216
208
"""
217
209
# add gpu memory
218
210
if self .trainer ._device_type == DeviceType .GPU and self .log_gpu_memory :
@@ -348,27 +340,6 @@ def _track_callback_metrics(self, eval_results):
348
340
if self .trainer .state in (TrainerState .TESTING , TrainerState .VALIDATING ):
349
341
self .trainer .logger_connector .evaluation_callback_metrics .update (flat )
350
342
351
- def __process_eval_epoch_end_results_and_log_legacy_update (self , prog_bar_metrics , log_metrics , callback_metrics ):
352
- # eval loop returns all metrics
353
- dataloader_result_metrics = {** prog_bar_metrics , ** log_metrics , ** callback_metrics }
354
-
355
- # add metrics to prog bar
356
- self .trainer .logger_connector .add_progress_bar_metrics (prog_bar_metrics )
357
-
358
- # log metrics
359
- if len (log_metrics ) > 0 :
360
- self .trainer .logger_connector .log_metrics (log_metrics , {})
361
-
362
- # track metrics for callbacks (all prog bar, logged and callback metrics)
363
- callback_metrics .update (log_metrics )
364
- callback_metrics .update (prog_bar_metrics )
365
- self .trainer .logger_connector .callback_metrics .update (callback_metrics )
366
- if self .trainer .state in (TrainerState .TESTING , TrainerState .VALIDATING ):
367
- self .trainer .logger_connector .evaluation_callback_metrics .update (callback_metrics )
368
-
369
- if len (dataloader_result_metrics ) > 0 :
370
- self .eval_loop_results .append (dataloader_result_metrics )
371
-
372
343
def __process_eval_epoch_end_results_and_log_legacy (self , eval_results ):
373
344
if self .trainer .sanity_checking :
374
345
return
@@ -379,21 +350,21 @@ def __process_eval_epoch_end_results_and_log_legacy(self, eval_results):
379
350
if not isinstance (eval_results , list ):
380
351
eval_results = [eval_results ]
381
352
382
- num_loaders : int = self .trainer .evaluation_loop .num_dataloaders
383
- prog_bar_metrics , log_metrics , callback_metrics = {}, {}, {}
384
-
385
353
for result_idx , result in enumerate (eval_results ):
386
- _ , prog_bar_metrics , log_metrics , callback_metrics , _ = self .trainer .process_dict_result (result )
354
+ _ , prog_bar_metrics , log_metrics , _ = self .trainer .process_dict_result (result )
355
+
356
+ # eval loop returns all metrics
357
+ dataloader_result_metrics = {** prog_bar_metrics , ** log_metrics }
358
+
359
+ # add metrics to prog bar
360
+ self .trainer .logger_connector .add_progress_bar_metrics (prog_bar_metrics )
387
361
388
- if num_loaders > 1 :
389
- self .__process_eval_epoch_end_results_and_log_legacy_update (
390
- prog_bar_metrics , log_metrics , callback_metrics
391
- )
362
+ # log metrics
363
+ if len (log_metrics ) > 0 :
364
+ self .trainer .logger_connector .log_metrics (log_metrics , {})
392
365
393
- if num_loaders == 1 :
394
- self .__process_eval_epoch_end_results_and_log_legacy_update (
395
- prog_bar_metrics , log_metrics , callback_metrics
396
- )
366
+ if len (dataloader_result_metrics ) > 0 :
367
+ self .eval_loop_results .append (dataloader_result_metrics )
397
368
398
369
def on_train_epoch_end (self ):
399
370
# inform cached logger connector epoch finished
@@ -446,10 +417,9 @@ def log_train_epoch_end_metrics(
446
417
447
418
# TODO: deprecate 1.0
448
419
else :
449
- out = self .__run_legacy_training_epoch_end (
450
- num_optimizers , epoch_output , model , is_result_obj , epoch_callback_metrics
420
+ epoch_log_metrics , epoch_progress_bar_metrics = self .__run_legacy_training_epoch_end (
421
+ num_optimizers , epoch_output , model , is_result_obj
451
422
)
452
- epoch_log_metrics , epoch_progress_bar_metrics , epoch_callback_metrics = out
453
423
454
424
# it will perform reduction over epoch and return log metrics
455
425
cached_epoch_log_metrics = self .cached_results .get_epoch_log_metrics ()
@@ -501,9 +471,7 @@ def training_epoch_end(self, model, epoch_output, num_optimizers):
501
471
# capture logging
502
472
self .trainer .logger_connector .cache_logged_metrics ()
503
473
504
- def __run_legacy_training_epoch_end (
505
- self , num_optimizers , epoch_output , model , is_result_obj , epoch_callback_metrics
506
- ):
474
+ def __run_legacy_training_epoch_end (self , num_optimizers , epoch_output , model , is_result_obj ):
507
475
508
476
epoch_log_metrics = {}
509
477
epoch_progress_bar_metrics = {}
@@ -534,15 +502,14 @@ def __run_legacy_training_epoch_end(
534
502
_processed_outputs = self .trainer .process_dict_result (epoch_output )
535
503
epoch_progress_bar_metrics = _processed_outputs [1 ]
536
504
epoch_log_metrics = _processed_outputs [2 ]
537
- epoch_callback_metrics = _processed_outputs [3 ]
538
505
539
506
# --------------------------
540
507
# Structured Result (auto epoch end)
541
508
# --------------------------
542
509
elif is_result_obj :
543
510
epoch_log_metrics , epoch_progress_bar_metrics = self .__auto_reduce_results_on_epoch_end (epoch_output )
544
511
545
- return epoch_log_metrics , epoch_progress_bar_metrics , epoch_callback_metrics
512
+ return epoch_log_metrics , epoch_progress_bar_metrics
546
513
547
514
def __auto_reduce_results_on_epoch_end (self , epoch_output ):
548
515
epoch_log_metrics = {}
0 commit comments