Trainer Configuration Overview
Correspondance from Legacy to Trainer
If you are already familiar with the Legacy YAML specification and just want to find out how to specify a specific parameter in the Trainer YAML specification, please refer to the table below.
Parameter | Legacy Specification | Trainer Specification |
---|---|---|
eval_input | eval_input:
… | trainer:
fit:
val_dataloader:
…
trainer:
validate:
val_dataloader:
…
trainer:
validate_all:
val_dataloaders:
... |
eval_input.micro_batch_size | eval_input: micro_batch_size: ... | trainer: init: callbacks: - ScopedValidateFlags: csx.performance.micro_batch_size: ... |
model | model: ... | trainer: init: model: ... |
model.fp16_type | model: fp16_type: ... | trainer: init: precision: fp16_type: ... |
model.mixed_precision | model: mixed_precision: ... | trainer: init: precision: enabled: ... |
optimizer | optimizer: ... | trainer: init: optimizer: ... |
optimizer.grad_accum_steps | optimizer: grad_accum_steps: ... | trainer: init: loop: grad_accum_steps: ... |
optimizer.initial_loss_scale | optimizer: initial_loss_scale: ... | trainer: init: precision: initial_loss_scale: ... |
optimizer.learning_rate | optimizer: learning_rate: ... | trainer: init: schedulers: ... |
optimizer.log_summaries | optimizer: log_summaries: ... | trainer: init: callbacks: - ComputeNorm: ... |
optimizer.loss_scaling_factor | optimizer: loss_scaling_factor: ... | trainer: init: precision: loss_scaling_factor: ... |
optimizer.max_gradient_norm | optimizer: max_gradient_norm: ... | trainer: init: precision: max_gradient_norm: ... |
optimizer.max_gradient_value | optimizer: max_gradient_value: ... | trainer: init: precision: max_gradient_value: ... |
optimizer.max_loss_scale | optimizer: max_loss_scale: ... | trainer: init: precision: max_loss_scale: ... |
optimizer.min_loss_scale | optimizer: min_loss_scale: ... | trainer: init: precision: min_loss_scale: ... |
optimizer.steps_per_increase | optimizer: steps_per_increase: ... | trainer: init: precision: steps_per_increase: ... |
runconfig.act_memory_gi | runconfig: act_memory_gi: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.act_memory_gi: ... |
runconfig.autoload_last_checkpoint | runconfig: autoload_last_checkpoint: ... | trainer: init: checkpoint: autoload_last_checkpoint: ... |
runconfig.check_loss_values | runconfig: check_loss_values: ... | trainer: init: callbacks: - CheckLoss: ... |
runconfig.checkpoint_path | runconfig: checkpoint_path: ... | trainer: fit: ckpt_path: ... trainer: validate: ckpt_path: ... |
runconfig.checkpoint_steps | runconfig: checkpoint_steps: ... | trainer: init: checkpoint: steps: ... |
runconfig.cmd_memory_gi | runconfig: cmd_memory_gi: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.cmd_memory_gi: ... |
runconfig.compile_crd_memory_gi | runconfig: compile_crd_memory_gi: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.compile_crd_memory_gi: ... |
runconfig.compile_dir | runconfig: compile_dir: ... | trainer: init: backend: compile_dir: ... |
runconfig.compile_only | runconfig: compile_only: ... | trainer: init: backend: compile_only: ... |
runconfig.credentials_path | runconfig: credentials_path: ... | trainer: init: backend: cluster_config: credentials_path: ... |
runconfig.debug_args | runconfig: debug_args: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.debug_args: ... |
runconfig.debug_args_path | runconfig: debug_args_path: ... | trainer: init: callbacks: - DebugArgsPath: debug_args_path: ... |
runconfig.disable_strict_checkpoint_loading | runconfig: disable_strict_checkpoint_loading: ... | trainer: init: checkpoint: disable_strict_checkpoint_loading: ... |
runconfig.disable_version_check | runconfig: disable_version_check: ... | trainer: init: backend: cluster_config: disable_version_check: ... |
runconfig.dist_backend | runconfig: dist_backend: ... | trainer: init: backend: dist_backend: ... |
runconfig.drop_data | runconfig: drop_data: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.drop_data: ... |
runconfig.dump_activations | runconfig: dump_activations: ... | trainer: init: callbacks: - DumpActivations: ... |
runconfig.enable_act_frequency | runconfig: enable_act_frequency: ... | trainer: init: logging: enable_act_frequency: ... |
runconfig.enable_distributed | runconfig: enable_distributed: ... | trainer: init: backend: enable_distributed: ... |
runconfig.eval_frequency | runconfig: eval_frequency: ... | trainer: init: loop: eval_frequency: ... |
runconfig.eval_steps | runconfig: eval_steps: ... | trainer: init: loop: eval_steps: ... |
runconfig.execute_crd_memory_gi | runconfig: execute_crd_memory_gi: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.execute_crd_memory_gi: ... |
runconfig.experimental.listeners | runconfig: experimental: listeners: ... | trainer: init: callbacks: - Listener: listeners: ... |
runconfig.init_method | runconfig: init_method: ... | trainer: init: backend: init_method: ... |
runconfig.job_labels | runconfig: job_labels: ... | trainer: init: backend: cluster_config: job_labels: ... |
runconfig.job_priority | runconfig: job_priority: ... | trainer: init: backend: cluster_config: job_priority: ... |
runconfig.job_time_sec | runconfig: job_time_sec: ... | trainer: init: backend: cluster_config: job_time_sec: ... |
runconfig.lazy_initialization | runconfig: lazy_initialization: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.lazy_initialization: ... |
runconfig.load_checkpoint_states | runconfig: load_checkpoint_states: ... | trainer: init: callbacks: - LoadCheckpointStates: load_checkpoint_states: ... |
runconfig.log_initialization | runconfig: log_initialization: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.log_initialization: ... |
runconfig.log_input_summaries | runconfig: log_input_summaries: ... | trainer: init: callbacks: - LogInputSummaries: ... |
runconfig.log_steps | runconfig: log_steps: ... | trainer: init: logging: log_steps: ... |
runconfig.logging | runconfig: logging: ... | trainer: init: logging: log_level: ... |
runconfig.main_process_id | runconfig: main_process_id: ... | trainer: init: backend: main_process_id: ... |
runconfig.max_checkpoints | runconfig: max_checkpoints: ... | trainer: init: callbacks: - KeepNCheckpoints: n: ... |
runconfig.max_steps | runconfig: max_steps: ... | trainer: init: loop: max_steps: ... |
runconfig.mgmt_address | runconfig: mgmt_address: ... | trainer: init: backend: cluster_config: mgmt_address: ... |
runconfig.mgmt_namespace | runconfig: mgmt_namespace: ... | trainer: init: backend: cluster_config: mgmt_namespace: ... |
runconfig.model_dir | runconfig: model_dir: ... | trainer: init: model_dir: ... |
runconfig.mount_dirs | runconfig: mount_dirs: ... | trainer: init: backend: cluster_config: mount_dirs: ... |
runconfig.num_act_servers | runconfig: num_act_servers: ... | trainer: init: backend: cluster_config: max_act_per_csx: ... |
runconfig.num_csx | runconfig: num_csx: ... | trainer: init: backend: cluster_config: num_csx: ... |
runconfig.num_epochs | runconfig: num_epochs: ... | trainer: init: loop: num_epochs: ... |
runconfig.num_steps | runconfig: num_steps: ... | trainer: init: loop: num_steps: ... |
runconfig.num_wgt_servers | runconfig: num_wgt_servers: ... | trainer: init: backend: cluster_config: max_wgt_servers: ... |
runconfig.num_workers_per_csx | runconfig: num_workers_per_csx: ... | trainer: init: backend: cluster_config: num_workers_per_csx: ... |
runconfig.op_profiler_config | runconfig: op_profiler_config: ... | trainer: init: callbacks: - OpProfiler: ... |
runconfig.precision_opt_level | runconfig: precision_opt_level: ... | trainer: init: precision: precision_opt_level: ... |
runconfig.python_paths | runconfig: python_paths: ... | trainer: init: backend: cluster_config: python_paths: ... |
runconfig.retrace_every_iteration | runconfig: retrace_every_iteration: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.retrace_every_iteration: ... |
runconfig.save_initial_checkpoint | runconfig: save_initial_checkpoint: ... | trainer: init: checkpoint: save_initial_checkpoint: ... |
runconfig.seed | runconfig: seed: ... | trainer: init: seed: ... |
runconfig.steps_per_epoch | runconfig: steps_per_epoch: ... | trainer: init: loop: steps_per_epoch: ... |
runconfig.sync_batchnorm | runconfig: sync_batchnorm: ... | trainer: init: backend: sync_batchnorm: ... |
runconfig.target_device | runconfig: target_device: ... | trainer: init: backend: backend_type: ... |
runconfig.transfer_processes | runconfig: transfer_processes: ... | trainer: init: callbacks: - GlobalFlags: csx.performance.transfer_processes: ... |
runconfig.validate_only | runconfig: validate_only: ... | trainer: init: backend: validate_only: ... |
runconfig.wgt_memory_gi | runconfig: wgt_memory_gi: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.wgt_memory_gi: ... |
runconfig.wrk_memory_gi | runconfig: wrk_memory_gi: ... | trainer: init: callbacks: - GlobalFlags: csx.debug.wrk_memory_gi: ... |
runconfig.wsc_log_level | runconfig: wsc_log_level: ... | trainer: init: logging: wsc_log_level: ... |
sparsity | sparsity: ... | trainer: init: |