文章目录
量化前
量化后
#Dynamic quantization 动态量化
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType
model_fp32 = 'path/to/the/model.onnx'
model_quant = 'path/to/the/model.quant.onnx'
quantized_model = quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QUInt8)
--------------------------
# QAT quantization QAT量化
import onnx
from onnxruntime.quantization import quantize_qat, QuantType
model_fp32 = 'path/to/the/model.onnx'
model_quant = 'path/to/the/model.quant.onnx'
quantized_model = quantize_qat(model_fp32, model_quant)