pytorch · ebsmothers · Jul 12, 2024 · Jul 12, 2024
diff --git a/tests/torchtune/utils/test_checkpointer.py b/tests/torchtune/utils/test_checkpointer.py
@@ -628,7 +628,6 @@ def state_dict(self, weight_dtype):
  ),
  "model.norm.weight": randn(_DIM, dtype=weight_dtype),
  }
- state_dict["lm_head.weight"] = state_dict["model.embed_tokens.weight"]
  return state_dict
 
  @pytest.fixture
@@ -704,8 +703,7 @@ def test_load_save_checkpoint_single_file(
  # Converted state dict from the checkpointer
 
  state_dict = single_file_checkpointer.load_checkpoint()
- # Check that we've loaded all the keys - we're loading one less key in: lm_head.weight
- assert len(state_dict["model"].keys()) == (len(orig_state_dict.keys()) - 1)
+ assert len(state_dict["model"].keys()) == len(orig_state_dict.keys())
 
  # the keys in original state dict should match up with the keys in the weight_map
  for key in orig_state_dict.keys():

diff --git a/torchtune/models/gemma/__init__.py b/torchtune/models/gemma/__init__.py
@@ -5,7 +5,6 @@
 # LICENSE file in the root directory of this source tree.
 
 from ._component_builders import gemma # noqa
-from ._convert_weights import gemma_hf_to_tune, gemma_tune_to_hf # noqa
 from ._model_builders import ( # noqa
  gemma_2b,
  gemma_7b,

diff --git a/torchtune/models/gemma/_convert_weights.py b/torchtune/models/gemma/_convert_weights.py
diff --git a/torchtune/utils/_checkpointing/_checkpointer.py b/torchtune/utils/_checkpointing/_checkpointer.py
@@ -16,7 +16,6 @@
 from torchtune import utils
 
 from torchtune.models import convert_weights
-from torchtune.models.gemma import gemma_hf_to_tune, gemma_tune_to_hf
 from torchtune.models.mistral import (
  mistral_reward_hf_to_tune,
  mistral_reward_tune_to_hf,
@@ -427,14 +426,6 @@ def load_checkpoint(self) -> Dict[str, Any]:
  num_kv_heads=self._config["num_key_value_heads"],
  dim=self._config["hidden_size"],
  )
- elif self._model_type == ModelType.GEMMA:
- converted_state_dict[utils.MODEL_KEY] = gemma_hf_to_tune(
- merged_state_dict,
- num_heads=self._config["num_attention_heads"],
- num_kv_heads=self._config["num_key_value_heads"],
- dim=self._config["hidden_size"],
- head_dim=self._config["head_dim"],
- )
  else:
  converted_state_dict[utils.MODEL_KEY] = convert_weights.hf_to_tune(
  merged_state_dict,
@@ -485,14 +476,6 @@ def save_checkpoint(
  num_kv_heads=self._config["num_key_value_heads"],
  dim=self._config["hidden_size"],
  )
- elif self._model_type == ModelType.GEMMA:
- state_dict[utils.MODEL_KEY] = gemma_tune_to_hf(
- state_dict[utils.MODEL_KEY],
- num_heads=self._config["num_attention_heads"],
- num_kv_heads=self._config["num_key_value_heads"],
- dim=self._config["hidden_size"],
- head_dim=self._config["head_dim"],
- )
  else:
  state_dict[utils.MODEL_KEY] = convert_weights.tune_to_hf(
  state_dict[utils.MODEL_KEY],