Skip to content

Commit

Permalink
Refactor OV weight compression call inside from_pretrained (huggingfa…
Browse files Browse the repository at this point in the history
…ce#683)

* Move calibration dataset construction to WC function

* Tweak tokenizer

* Removed not used import

* ruff

* ruff 2

* Refactor through OVQuantizer call
  • Loading branch information
nikita-savelyevv authored Apr 29, 2024
1 parent 4869104 commit c235ae1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 37 deletions.
23 changes: 7 additions & 16 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import logging
import os
import warnings
Expand All @@ -25,7 +25,7 @@
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from openvino.preprocess import PrePostProcessor
from openvino.runtime import Core, Tensor, Type
from transformers import AutoModelForCausalLM, AutoTokenizer, PretrainedConfig
from transformers import AutoModelForCausalLM, PretrainedConfig
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.generation import GenerationMixin
from transformers.modeling_outputs import CausalLMOutputWithPast
Expand Down Expand Up @@ -646,9 +646,8 @@ def _from_pretrained(
raise ImportError(
"Quantization of the weights requires nncf, please install it with `pip install nncf`"
)
import nncf

from .quantization import _weight_only_quantization
from optimum.intel.openvino.quantization import OVQuantizer

default_config = _check_default_4bit_configs(config)

Expand All @@ -657,18 +656,10 @@ def _from_pretrained(
f"For the given model, we recommend the following `quantization_config` : {default_config}"
)

calibration_dataset = None
if isinstance(quantization_config.dataset, str):
tokenizer = quantization_config.tokenizer or AutoTokenizer.from_pretrained(model_id)

from optimum.gptq.data import get_dataset, prepare_dataset

nsamples = quantization_config.num_samples or 128
dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
dataset = prepare_dataset(dataset)
calibration_dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x))

_weight_only_quantization(model, quantization_config, calibration_dataset)
quantizer = OVQuantizer(causal_model)
quantization_config_copy = copy.deepcopy(quantization_config)
quantization_config_copy.tokenizer = quantization_config.tokenizer or model_id
quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy))

return causal_model

Expand Down
55 changes: 34 additions & 21 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
def quantize(
self,
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
save_directory: Union[str, Path] = None,
save_directory: Optional[Union[str, Path]] = None,
ov_config: OVConfig = None,
file_name: Optional[str] = None,
batch_size: int = 1,
Expand All @@ -217,7 +217,7 @@ def quantize(
calibration_dataset (`datasets.Dataset` or `nncf.Dataset` or `Iterable`, *optional*):
A collection of data samples to use for quantization calibration. Is optional for weight-only
quantization and is required for full quantization.
save_directory (`Union[str, Path]`):
save_directory (`Union[str, Path]`, *optional*):
The directory where the quantized model should be saved.
ov_config (`OVConfig`, *optional*):
The configuration containing the parameters related to quantization. If not provided, 8-bit symmetric
Expand Down Expand Up @@ -265,10 +265,6 @@ def quantize(
"as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization."
)

if save_directory is None:
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
raise ValueError("`save_directory` needs to be specified")

if ov_config is None:
ov_config = OVConfig()
if not isinstance(ov_config, OVConfig):
Expand Down Expand Up @@ -321,21 +317,41 @@ def quantize(
def _quantize_ovbasemodel(
self,
ov_config: OVConfig,
save_directory: Union[str, Path],
save_directory: Union[str, Path] = None,
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
batch_size: int = 1,
data_collator: Optional[DataCollator] = None,
remove_unused_columns: bool = True,
**kwargs,
):
save_directory = Path(save_directory)
save_directory.mkdir(parents=True, exist_ok=True)
if save_directory is not None:
save_directory = Path(save_directory)
save_directory.mkdir(parents=True, exist_ok=True)

quantization_config = ov_config.quantization_config
if isinstance(quantization_config, OVWeightQuantizationConfig):
if calibration_dataset is None and isinstance(quantization_config.dataset, str):
from optimum.intel import OVModelForCausalLM

if isinstance(self.model, OVModelForCausalLM):
from optimum.gptq.data import get_dataset, prepare_dataset

tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer)
nsamples = quantization_config.num_samples if quantization_config.num_samples else 128
calibration_dataset = get_dataset(
quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples
)
calibration_dataset = prepare_dataset(calibration_dataset)
calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x))
else:
raise ValueError(
f"Can't create weight compression calibration dataset from string for {type(self.model)}"
)

_weight_only_quantization(self.model.model, quantization_config, calibration_dataset)
self.model.save_pretrained(save_directory)
ov_config.save_pretrained(save_directory)
if save_directory is not None:
self.model.save_pretrained(save_directory)
ov_config.save_pretrained(save_directory)
return
if not isinstance(quantization_config, OVQuantizationConfig):
raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}")
Expand Down Expand Up @@ -387,8 +403,9 @@ def _quantize_ovbasemodel(
**kwargs,
)
self.model.model = quantized_model
self.model.save_pretrained(save_directory)
ov_config.save_pretrained(save_directory)
if save_directory is not None:
self.model.save_pretrained(save_directory)
ov_config.save_pretrained(save_directory)

def _quantize_torchmodel(
self,
Expand All @@ -401,6 +418,10 @@ def _quantize_torchmodel(
remove_unused_columns: bool = True,
**kwargs,
):
if save_directory is None:
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
raise ValueError("`save_directory` needs to be specified")

self._set_task()
save_directory = Path(save_directory)
save_directory.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -660,14 +681,6 @@ def _weight_only_quantization(
dataset = calibration_dataset
else:
dataset = nncf.Dataset(calibration_dataset)
elif config.dataset is not None and isinstance(config.dataset, str):
tokenizer = AutoTokenizer.from_pretrained(config.tokenizer)

from optimum.gptq.data import get_dataset, prepare_dataset

nsamples = config.num_samples if config.num_samples else 128
dataset = get_dataset(config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
dataset = prepare_dataset(dataset)

sensitivity_metric = None
if isinstance(config.sensitivity_metric, str):
Expand Down

0 comments on commit c235ae1

Please sign in to comment.