Skip to content

Commit

Permalink
avoid crashing if file can't be save, add repo name
Browse files Browse the repository at this point in the history
  • Loading branch information
tcapelle committed Apr 16, 2024
1 parent dde16be commit f19d4d6
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions torchtune/utils/metric_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,15 @@ def __init__(
_, self.rank = get_world_size_and_rank()

if self.rank == 0:
self._wandb.init(
run = self._wandb.init(
project=project,
entity=entity,
group=group,
reinit=True,
resume="allow",
**kwargs,
)
run._label(repo="torchtune")

def log_config(self, config: DictConfig) -> None:
"""Saves the config locally and also logs the config to W&B. The config is
Expand All @@ -197,22 +198,25 @@ def log_config(self, config: DictConfig) -> None:
if self._wandb.run:
resolved = OmegaConf.to_container(config, resolve=True)
self._wandb.config.update(resolved)

output_config_fname = Path(
os.path.join(
config.checkpointer.checkpoint_dir,
f"torchtune_config_{self._wandb.run.id}.yaml",
)
)
OmegaConf.save(config, output_config_fname)
try:
output_config_fname = Path(
os.path.join(
config.checkpointer.checkpoint_dir,
f"torchtune_config_{self._wandb.run.id}.yaml",
)
)
OmegaConf.save(config, output_config_fname)

log.info(f"Logging {output_config_fname} to W&B under Files")
self._wandb.save(
output_config_fname, base_path=output_config_fname.parent
)

except Exception as e:
log.warning(f"Error saving {output_config_fname} to W&B.\nError: \n{e}")
log.warning(
f"Error saving {output_config_fname} to W&B.\nError: \n{e}."
"Don't worry the config will be logged the W&B workspace"
)

def log(self, name: str, data: Scalar, step: int) -> None:
if self._wandb.run:
Expand Down

0 comments on commit f19d4d6

Please sign in to comment.