megengine.quantization.utils 源代码

# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import abc
from enum import Enum
from functools import partial, update_wrapper, wraps
from typing import Union

import numpy as np

from .. import functional as F
from ..autodiff import Function
from ..core._imperative_rt.core2 import apply
from ..core.ops import builtin
from ..core.tensor.dtype import (
    QuantDtypeMeta,
    _builtin_quant_dtypes,
    create_quantized_dtype,
)
from ..tensor import Tensor


class Round(Function):
    """
    The functional round have no grad and can not use for quantization-aware-training.
    We use Function and STE(Straight-Through Estimator) to implement backward propagation.
    """

    def forward(self, x):
        return F.round(x)

    def backward(self, output_grads):
        return output_grads


def tqt_forward(qmin, qmax, inp, scale):
    op = builtin.TQT(qmin=qmin, qmax=qmax)
    (output,) = apply(op, inp, scale)
    return output


def register_method_to_class(cls):
    def decorator(func):
        @wraps(func)
        def wrapper(self, *args, **kwargs):
            return func(self, *args, **kwargs)

        if isinstance(func, partial):
            update_wrapper(func, func.func)
        setattr(cls, func.__name__, wrapper)
        return func

    return decorator


class QuantMode(Enum):
    """
    Quantization mode enumerate class.
    """

    SYMMERTIC = 1
    ASYMMERTIC = 2


class QParams:
    """
    To standardize FakeQuant, Observer and Tensor's qparams format. If custom
    qparams is needed, inherit this class and add custom ``__slots__``.
    """

    __slots__ = "mode", "dtype_meta", "scale", "zero_point"

    def __init__(
        self,
        mode: QuantMode,
        dtype_meta: QuantDtypeMeta,
        scale: Tensor,
        zero_point: Tensor,
    ):
        self.mode = mode
        self.dtype_meta = dtype_meta
        self.scale = scale
        self.zero_point = zero_point

    def update(self, qparams: "QParams"):
        for key in self.__slots__:
            setattr(self, key, getattr(qparams, key))

    def __eq__(self, other):
        if len(self.__slots__) != len(other.__slots__):
            return False
        for key in self.__slots__:
            if not hasattr(other, key) or getattr(self, key) != getattr(other, key):
                return False
        return True

    def __repr__(self):
        content = ", ".join(
            ["{}={}".format(key, getattr(self, key)) for key in self.__slots__]
        )
        return "QParams({})".format(content)


class QParamsModuleMixin(abc.ABC):
    def get_quantized_dtype(self):
        qparams = self.get_qparams()
        dtype = qparams.dtype_meta
        scale = float(qparams.scale.numpy()) if qparams.scale is not None else None
        zero_point = (
            int(qparams.zero_point.numpy()) if qparams.zero_point is not None else None
        )
        return create_quantized_dtype(dtype, scale, zero_point)

    @abc.abstractmethod
    def get_qparams(self) -> QParams:
        pass


_builtin_qparams = {
    QuantMode.SYMMERTIC: partial(QParams, mode=QuantMode.SYMMERTIC),
    QuantMode.ASYMMERTIC: partial(QParams, mode=QuantMode.ASYMMERTIC),
}


def create_qparams(
    mode: QuantMode = QuantMode.SYMMERTIC,
    dtype_meta: Union[str, QuantDtypeMeta] = None,
    scale: Tensor = None,
    zero_point: Tensor = None,
):
    """
    Return :class:`~.QParams` according to the mode.
    """
    if isinstance(dtype_meta, str):
        dtype_meta = _builtin_quant_dtypes[dtype_meta]
    if mode is None:
        return QParams(mode, dtype_meta, scale, zero_point)
    assert isinstance(mode, QuantMode)
    return _builtin_qparams[mode](
        dtype_meta=dtype_meta, scale=scale, zero_point=zero_point
    )


[文档]def fake_quant_tensor(inp: Tensor, qparams: QParams) -> Tensor: """ Apply fake quantization to the inp tensor. :param inp: the input tensor which need to be faked. :param qparams: to get mode, qmin, qmax, scale and zero_point from. """ scale = qparams.scale if qparams.mode == QuantMode.ASYMMERTIC: zero_point = qparams.zero_point else: zero_point = Tensor([0.0], dtype=np.float32) qmin = qparams.dtype_meta.qmin qmax = qparams.dtype_meta.qmax op = builtin.FakeQuant(qmin=qmin, qmax=qmax) return apply(op, inp, scale, zero_point)[0]
[文档]def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor: """ Apply fake quantization to bias, with the special scale from input tensor and weight tensor, the quantized type set to qint32 also. :param bias: the bias tensor which need to be faked. :param inp: the input tensor which contain the quantization parameters. :param w_qat: the weight tensor which contain the quantization parameters. .. warning:: Only work for symmetric quantization method now. """ b_qat = bias if ( getattr(inp, "qparams", None) is not None and getattr(w_qat, "qparams", None) is not None and bias is not None ): inp_params = inp.qparams w_params = w_qat.qparams if inp_params.scale is not None and w_params.scale is not None: assert inp_params.mode == w_params.mode, "incompatible QuantMode" # TODO: support quint8 dtype. assert ( inp_params.dtype_meta.np_dtype_str == "int8" and w_params.dtype_meta.np_dtype_str == "int8" ), "fake_quant_bias only support int8 like dtype now" # use the same mode with weight. # TODO: avoid hardcode b_dtype = _builtin_quant_dtypes["qint32"] b_param = create_qparams( w_params.mode, b_dtype, scale=inp_params.scale * w_params.scale ) b_qat = fake_quant_tensor(bias, b_param) b_qat.qparams.update(b_param) return b_qat