Quantization Explorer

LAB03: TensorFlow Lite and Quantization

Interactive Quantization Visualization

Explore how different bit widths affect model accuracy and size.

Concept from LAB03

See Section 3.2: Quantization Types in the PDF book.

Interactive Quantization Simulator

Code

viewof bitWidth = Inputs.range([2, 32], {
  value: 8,
  step: 1,
  label: "Bit Width",
  description: "Number of bits for quantized values"
})

viewof modelSizeMB = Inputs.range([1, 100], {
  value: 10,
  step: 1,
  label: "Base Model Size (MB)",
  description: "Original FP32 model size"
})

viewof numWeights = Inputs.range([1000, 1000000], {
  value: 100000,
  step: 1000,
  label: "Number of Weights",
  description: "Total parameters in the model"
})

Code

quantizedSizeMB = (modelSizeMB * bitWidth) / 32

// Estimated accuracy based on typical results
baseAccuracy = 97.5
accuracyLoss = bitWidth >= 16 ? 0.1 :
               bitWidth >= 8 ? 0.3 :
               bitWidth >= 4 ? 1.7 :
               8.2
quantizedAccuracy = baseAccuracy - accuracyLoss

sizeReduction = ((1 - quantizedSizeMB / modelSizeMB) * 100)

memoryBytes = (numWeights * bitWidth) / 8

// Quantization error simulation
quantizationError = 1 / Math.pow(2, bitWidth)

html`<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 25px; border-radius: 10px; color: white; margin: 20px 0;">
  <h3 style="margin-top: 0;">Quantization Results: ${bitWidth}-bit</h3>
  <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 20px; margin-top: 15px;">
    <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
      <div style="font-size: 1.8em; font-weight: bold;">${quantizedSizeMB.toFixed(2)} MB</div>
      <div style="opacity: 0.9;">Model Size</div>
      <div style="font-size: 0.9em; margin-top: 5px; opacity: 0.8;">↓ ${sizeReduction.toFixed(1)}% reduction</div>
    </div>
    <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
      <div style="font-size: 1.8em; font-weight: bold;">${quantizedAccuracy.toFixed(2)}%</div>
      <div style="opacity: 0.9;">Accuracy</div>
      <div style="font-size: 0.9em; margin-top: 5px; opacity: 0.8;">↓ ${accuracyLoss.toFixed(2)}% loss</div>
    </div>
    <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
      <div style="font-size: 1.8em; font-weight: bold;">${(memoryBytes / 1024).toFixed(0)} KB</div>
      <div style="opacity: 0.9;">Memory Usage</div>
      <div style="font-size: 0.9em; margin-top: 5px; opacity: 0.8;">${numWeights.toLocaleString()} weights</div>
    </div>
    <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;">
      <div style="font-size: 1.8em; font-weight: bold;">${quantizationError.toExponential(2)}</div>
      <div style="opacity: 0.9;">Quant. Error</div>
      <div style="font-size: 0.9em; margin-top: 5px; opacity: 0.8;">Per value</div>
    </div>
  </div>
</div>`

Bit Width Comparison

Code

bitWidthOptions = [2, 4, 8, 16, 32]

comparisonData = bitWidthOptions.map(bits => {
  const sizeMB = (modelSizeMB * bits) / 32
  const accLoss = bits >= 16 ? 0.1 : bits >= 8 ? 0.3 : bits >= 4 ? 1.7 : 8.2
  const acc = baseAccuracy - accLoss
  return {
    bits: `${bits}-bit`,
    bitsNum: bits,
    size: sizeMB,
    accuracy: acc,
    sizePercent: (sizeMB / modelSizeMB) * 100,
    isCurrent: bits === bitWidth
  }
})

Plot.plot({
  height: 400,
  marginLeft: 60,
  marginBottom: 60,
  x: {label: "Bit Width"},
  y: {label: "Model Size (MB)", grid: true},
  color: {domain: [false, true], range: ["#94a3b8", "#ef4444"]},
  marks: [
    Plot.barY(comparisonData, {
      x: "bits",
      y: "size",
      fill: "isCurrent",
      title: d => `${d.bits}: ${d.size.toFixed(2)} MB (${d.sizePercent.toFixed(0)}% of FP32)`
    }),
    Plot.text(comparisonData, {
      x: "bits",
      y: "size",
      text: d => `${d.size.toFixed(1)} MB`,
      dy: -10,
      fontSize: 11
    }),
    Plot.ruleY([0])
  ]
})

Accuracy vs Model Size Trade-off

Code

Plot.plot({
  height: 400,
  marginLeft: 60,
  marginRight: 60,
  x: {label: "Model Size (MB)"},
  y: {label: "Accuracy (%)", domain: [85, 100]},
  color: {domain: [false, true], range: ["#0ea5e9", "#ef4444"]},
  marks: [
    Plot.line(comparisonData, {
      x: "size",
      y: "accuracy",
      stroke: "#0ea5e9",
      strokeWidth: 3,
      curve: "natural"
    }),
    Plot.dot(comparisonData, {
      x: "size",
      y: "accuracy",
      fill: "isCurrent",
      r: d => d.isCurrent ? 10 : 6,
      title: d => `${d.bits}: ${d.accuracy.toFixed(2)}% accuracy, ${d.size.toFixed(2)} MB`
    }),
    Plot.text(comparisonData, {
      x: "size",
      y: "accuracy",
      text: "bits",
      dy: -15,
      fontSize: 10
    }),
    Plot.gridY({stroke: "#e5e7eb", strokeOpacity: 0.5}),
    Plot.ruleY([90, 95], {stroke: "#94a3b8", strokeDasharray: "4,4"})
  ]
})

Quantization Levels Visualization

Code

numSamples = 200
weightRange = 3

sampleWeights = Array.from({length: numSamples}, () =>
  (Math.random() - 0.5) * 2 * weightRange
)

// Quantize weights
function quantizeValue(value, bits, minVal, maxVal) {
  const levels = Math.pow(2, bits)
  const scale = (maxVal - minVal) / (levels - 1)
  const quantized = Math.round((value - minVal) / scale) * scale + minVal
  return Math.max(minVal, Math.min(maxVal, quantized))
}

minWeight = -weightRange
maxWeight = weightRange

quantizedWeights = sampleWeights.map(w =>
  quantizeValue(w, bitWidth, minWeight, maxWeight)
)

// Create histogram data
histogramData = {
  const data = []
  for (let i = 0; i < numSamples; i++) {
    data.push({
      type: "Original (FP32)",
      value: sampleWeights[i]
    })
    data.push({
      type: `Quantized (${bitWidth}-bit)`,
      value: quantizedWeights[i]
    })
  }
  return data
}

Plot.plot({
  height: 350,
  x: {label: "Weight Value", domain: [-weightRange, weightRange]},
  y: {label: "Count"},
  color: {domain: ["Original (FP32)", `Quantized (${bitWidth}-bit)`], range: ["#3b82f6", "#f59e0b"]},
  marks: [
    Plot.rectY(
      histogramData,
      Plot.binX(
        {y: "count"},
        {
          x: "value",
          fill: "type",
          thresholds: 40,
          mixBlendMode: "multiply"
        }
      )
    ),
    Plot.ruleY([0])
  ],
  facet: {
    data: histogramData,
    y: "type",
    marginLeft: 150
  }
})

Quantization Error Distribution

Code

errorData = sampleWeights.map((original, i) => ({
  index: i,
  original: original,
  quantized: quantizedWeights[i],
  error: Math.abs(original - quantizedWeights[i])
}))

Plot.plot({
  height: 300,
  x: {label: "Sample Index"},
  y: {label: "Absolute Error", type: "log"},
  marks: [
    Plot.dot(errorData, {
      x: "index",
      y: d => d.error + 1e-6,
      fill: "#ef4444",
      r: 2,
      opacity: 0.6
    }),
    Plot.ruleY([quantizationError], {
      stroke: "#0ea5e9",
      strokeWidth: 2,
      strokeDasharray: "4,4"
    }),
    Plot.text([{x: numSamples * 0.95, y: quantizationError, label: "Expected Error"}], {
      x: "x",
      y: "y",
      text: "label",
      textAnchor: "end",
      dy: -8,
      fill: "#0ea5e9",
      fontSize: 11
    })
  ]
})

Weight Distribution Comparison

Code

import numpy as np
import matplotlib.pyplot as plt

# Simulate FP32 weights (normal distribution)
np.random.seed(42)
fp32_weights = np.random.randn(10000) * 0.5

# Quantize to different bit widths
def quantize(weights, bits):
    min_val, max_val = weights.min(), weights.max()
    scale = (max_val - min_val) / (2**bits - 1)
    quantized = np.round((weights - min_val) / scale) * scale + min_val
    return quantized

fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# FP32
axes[0, 0].hist(fp32_weights, bins=100, alpha=0.7, color='blue')
axes[0, 0].set_title('FP32 (32-bit float)')
axes[0, 0].set_xlabel('Weight Value')

# INT8
int8_weights = quantize(fp32_weights, 8)
axes[0, 1].hist(int8_weights, bins=100, alpha=0.7, color='green')
axes[0, 1].set_title('INT8 (8-bit integer)')
axes[0, 1].set_xlabel('Weight Value')

# INT4
int4_weights = quantize(fp32_weights, 4)
axes[1, 0].hist(int4_weights, bins=16, alpha=0.7, color='orange')
axes[1, 0].set_title('INT4 (4-bit integer)')
axes[1, 0].set_xlabel('Weight Value')

# INT2
int2_weights = quantize(fp32_weights, 2)
axes[1, 1].hist(int2_weights, bins=4, alpha=0.7, color='red')
axes[1, 1].set_title('INT2 (2-bit integer)')
axes[1, 1].set_xlabel('Weight Value')

plt.tight_layout()
plt.show()

Figure 23.1: Effect of quantization on weight distribution

Size vs Accuracy Trade-off

Code

# Simulated data based on typical results
bit_widths = [32, 16, 8, 4, 2]
model_sizes = [100, 50, 25, 12.5, 6.25]  # Relative size (%)
accuracies = [97.5, 97.4, 97.2, 95.8, 89.3]  # Typical MNIST accuracy

fig, ax1 = plt.subplots(figsize=(10, 5))

# Model size bars
bars = ax1.bar(range(len(bit_widths)), model_sizes, color='steelblue', alpha=0.7)
ax1.set_xticks(range(len(bit_widths)))
ax1.set_xticklabels([f'{b}-bit' for b in bit_widths])
ax1.set_ylabel('Model Size (%)', color='steelblue')
ax1.tick_params(axis='y', labelcolor='steelblue')

# Accuracy line
ax2 = ax1.twinx()
ax2.plot(range(len(bit_widths)), accuracies, 'ro-', linewidth=2, markersize=10)
ax2.set_ylabel('Accuracy (%)', color='red')
ax2.tick_params(axis='y', labelcolor='red')
ax2.set_ylim(85, 100)

plt.title('Quantization: Size vs Accuracy Trade-off')
plt.tight_layout()
plt.show()

Figure 23.2: Model size vs accuracy for different quantization levels

Quantization Error Analysis

Code

errors = []
for bits in [2, 4, 8, 16]:
    quantized = quantize(fp32_weights, bits)
    error = np.abs(fp32_weights - quantized)
    errors.append(error)

fig, ax = plt.subplots(figsize=(10, 5))
bp = ax.boxplot(errors, labels=['2-bit', '4-bit', '8-bit', '16-bit'])
ax.set_ylabel('Absolute Error')
ax.set_xlabel('Quantization Level')
ax.set_title('Quantization Error Distribution')
ax.set_yscale('log')
plt.grid(True, alpha=0.3)
plt.show()

/tmp/ipykernel_9954/781551588.py:8: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.
  bp = ax.boxplot(errors, labels=['2-bit', '4-bit', '8-bit', '16-bit'])

Figure 23.3: Quantization error by bit width

Key Insights

Bit Width	Size Reduction	Typical Accuracy Loss	Use Case
16-bit	2x	< 0.1%	GPU inference
8-bit	4x	< 0.5%	Edge devices (recommended)
4-bit	8x	1-3%	Extreme resource constraints
2-bit	16x	5-10%	Research only

Recommendation

INT8 quantization provides the best balance for edge deployment: - 4x size reduction - Minimal accuracy loss - Hardware acceleration on most devices

Try It Yourself

Open the LAB03 notebook
Train a model on CIFAR-10
Apply different quantization methods
Compare file sizes and accuracy

--- title: "Quantization Explorer" subtitle: "LAB03: TensorFlow Lite and Quantization" --- ## Interactive Quantization Visualization Explore how different bit widths affect model accuracy and size. ::: {.callout-note} ## Concept from LAB03 See **Section 3.2: Quantization Types** in the [PDF book](../downloads/Edge-Analytics-Lab-Book-v1.0.0.pdf). ::: ## Interactive Quantization Simulator ```{ojs} //| echo: false // Input controls viewof bitWidth = Inputs.range([2, 32], { value: 8, step: 1, label: "Bit Width", description: "Number of bits for quantized values" }) viewof modelSizeMB = Inputs.range([1, 100], { value: 10, step: 1, label: "Base Model Size (MB)", description: "Original FP32 model size" }) viewof numWeights = Inputs.range([1000, 1000000], { value: 100000, step: 1000, label: "Number of Weights", description: "Total parameters in the model" }) ``` ```{ojs} //| echo: false // Calculate quantization metrics quantizedSizeMB = (modelSizeMB * bitWidth) / 32 // Estimated accuracy based on typical results baseAccuracy = 97.5 accuracyLoss = bitWidth >= 16 ? 0.1 : bitWidth >= 8 ? 0.3 : bitWidth >= 4 ? 1.7 : 8.2 quantizedAccuracy = baseAccuracy - accuracyLoss sizeReduction = ((1 - quantizedSizeMB / modelSizeMB) * 100) memoryBytes = (numWeights * bitWidth) / 8 // Quantization error simulation quantizationError = 1 / Math.pow(2, bitWidth) html`<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 25px; border-radius: 10px; color: white; margin: 20px 0;"> <h3 style="margin-top: 0;">Quantization Results: ${bitWidth}-bit</h3> <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 20px; margin-top: 15px;"> <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;"> <div style="font-size: 1.8em; font-weight: bold;">${quantizedSizeMB.toFixed(2)} MB</div> <div style="opacity: 0.9;">Model Size</div> <div style="font-size: 0.9em; margin-top: 5px; opacity: 0.8;">↓ ${sizeReduction.toFixed(1)}% reduction</div> </div> <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;"> <div style="font-size: 1.8em; font-weight: bold;">${quantizedAccuracy.toFixed(2)}%</div> <div style="opacity: 0.9;">Accuracy</div> <div style="font-size: 0.9em; margin-top: 5px; opacity: 0.8;">↓ ${accuracyLoss.toFixed(2)}% loss</div> </div> <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;"> <div style="font-size: 1.8em; font-weight: bold;">${(memoryBytes / 1024).toFixed(0)} KB</div> <div style="opacity: 0.9;">Memory Usage</div> <div style="font-size: 0.9em; margin-top: 5px; opacity: 0.8;">${numWeights.toLocaleString()} weights</div> </div> <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px;"> <div style="font-size: 1.8em; font-weight: bold;">${quantizationError.toExponential(2)}</div> <div style="opacity: 0.9;">Quant. Error</div> <div style="font-size: 0.9em; margin-top: 5px; opacity: 0.8;">Per value</div> </div> </div> </div>` ``` ## Bit Width Comparison ```{ojs} //| echo: false bitWidthOptions = [2, 4, 8, 16, 32] comparisonData = bitWidthOptions.map(bits => { const sizeMB = (modelSizeMB * bits) / 32 const accLoss = bits >= 16 ? 0.1 : bits >= 8 ? 0.3 : bits >= 4 ? 1.7 : 8.2 const acc = baseAccuracy - accLoss return { bits: `${bits}-bit`, bitsNum: bits, size: sizeMB, accuracy: acc, sizePercent: (sizeMB / modelSizeMB) * 100, isCurrent: bits === bitWidth } }) Plot.plot({ height: 400, marginLeft: 60, marginBottom: 60, x: {label: "Bit Width"}, y: {label: "Model Size (MB)", grid: true}, color: {domain: [false, true], range: ["#94a3b8", "#ef4444"]}, marks: [ Plot.barY(comparisonData, { x: "bits", y: "size", fill: "isCurrent", title: d => `${d.bits}: ${d.size.toFixed(2)} MB (${d.sizePercent.toFixed(0)}% of FP32)` }), Plot.text(comparisonData, { x: "bits", y: "size", text: d => `${d.size.toFixed(1)} MB`, dy: -10, fontSize: 11 }), Plot.ruleY([0]) ] }) ``` ## Accuracy vs Model Size Trade-off ```{ojs} //| echo: false Plot.plot({ height: 400, marginLeft: 60, marginRight: 60, x: {label: "Model Size (MB)"}, y: {label: "Accuracy (%)", domain: [85, 100]}, color: {domain: [false, true], range: ["#0ea5e9", "#ef4444"]}, marks: [ Plot.line(comparisonData, { x: "size", y: "accuracy", stroke: "#0ea5e9", strokeWidth: 3, curve: "natural" }), Plot.dot(comparisonData, { x: "size", y: "accuracy", fill: "isCurrent", r: d => d.isCurrent ? 10 : 6, title: d => `${d.bits}: ${d.accuracy.toFixed(2)}% accuracy, ${d.size.toFixed(2)} MB` }), Plot.text(comparisonData, { x: "size", y: "accuracy", text: "bits", dy: -15, fontSize: 10 }), Plot.gridY({stroke: "#e5e7eb", strokeOpacity: 0.5}), Plot.ruleY([90, 95], {stroke: "#94a3b8", strokeDasharray: "4,4"}) ] }) ``` ## Quantization Levels Visualization ```{ojs} //| echo: false // Generate sample weight distribution numSamples = 200 weightRange = 3 sampleWeights = Array.from({length: numSamples}, () => (Math.random() - 0.5) * 2 * weightRange ) // Quantize weights function quantizeValue(value, bits, minVal, maxVal) { const levels = Math.pow(2, bits) const scale = (maxVal - minVal) / (levels - 1) const quantized = Math.round((value - minVal) / scale) * scale + minVal return Math.max(minVal, Math.min(maxVal, quantized)) } minWeight = -weightRange maxWeight = weightRange quantizedWeights = sampleWeights.map(w => quantizeValue(w, bitWidth, minWeight, maxWeight) ) // Create histogram data histogramData = { const data = [] for (let i = 0; i < numSamples; i++) { data.push({ type: "Original (FP32)", value: sampleWeights[i] }) data.push({ type: `Quantized (${bitWidth}-bit)`, value: quantizedWeights[i] }) } return data } Plot.plot({ height: 350, x: {label: "Weight Value", domain: [-weightRange, weightRange]}, y: {label: "Count"}, color: {domain: ["Original (FP32)", `Quantized (${bitWidth}-bit)`], range: ["#3b82f6", "#f59e0b"]}, marks: [ Plot.rectY( histogramData, Plot.binX( {y: "count"}, { x: "value", fill: "type", thresholds: 40, mixBlendMode: "multiply" } ) ), Plot.ruleY([0]) ], facet: { data: histogramData, y: "type", marginLeft: 150 } }) ``` ## Quantization Error Distribution ```{ojs} //| echo: false errorData = sampleWeights.map((original, i) => ({ index: i, original: original, quantized: quantizedWeights[i], error: Math.abs(original - quantizedWeights[i]) })) Plot.plot({ height: 300, x: {label: "Sample Index"}, y: {label: "Absolute Error", type: "log"}, marks: [ Plot.dot(errorData, { x: "index", y: d => d.error + 1e-6, fill: "#ef4444", r: 2, opacity: 0.6 }), Plot.ruleY([quantizationError], { stroke: "#0ea5e9", strokeWidth: 2, strokeDasharray: "4,4" }), Plot.text([{x: numSamples * 0.95, y: quantizationError, label: "Expected Error"}], { x: "x", y: "y", text: "label", textAnchor: "end", dy: -8, fill: "#0ea5e9", fontSize: 11 }) ] }) ``` ## Weight Distribution Comparison ```{python} #| label: fig-quantization #| fig-cap: "Effect of quantization on weight distribution" import numpy as np import matplotlib.pyplot as plt # Simulate FP32 weights (normal distribution) np.random.seed(42) fp32_weights = np.random.randn(10000) * 0.5 # Quantize to different bit widths def quantize(weights, bits): min_val, max_val = weights.min(), weights.max() scale = (max_val - min_val) / (2**bits - 1) quantized = np.round((weights - min_val) / scale) * scale + min_val return quantized fig, axes = plt.subplots(2, 2, figsize=(12, 8)) # FP32 axes[0, 0].hist(fp32_weights, bins=100, alpha=0.7, color='blue') axes[0, 0].set_title('FP32 (32-bit float)') axes[0, 0].set_xlabel('Weight Value') # INT8 int8_weights = quantize(fp32_weights, 8) axes[0, 1].hist(int8_weights, bins=100, alpha=0.7, color='green') axes[0, 1].set_title('INT8 (8-bit integer)') axes[0, 1].set_xlabel('Weight Value') # INT4 int4_weights = quantize(fp32_weights, 4) axes[1, 0].hist(int4_weights, bins=16, alpha=0.7, color='orange') axes[1, 0].set_title('INT4 (4-bit integer)') axes[1, 0].set_xlabel('Weight Value') # INT2 int2_weights = quantize(fp32_weights, 2) axes[1, 1].hist(int2_weights, bins=4, alpha=0.7, color='red') axes[1, 1].set_title('INT2 (2-bit integer)') axes[1, 1].set_xlabel('Weight Value') plt.tight_layout() plt.show() ``` ## Size vs Accuracy Trade-off ```{python} #| label: fig-tradeoff #| fig-cap: "Model size vs accuracy for different quantization levels" # Simulated data based on typical results bit_widths = [32, 16, 8, 4, 2] model_sizes = [100, 50, 25, 12.5, 6.25] # Relative size (%) accuracies = [97.5, 97.4, 97.2, 95.8, 89.3] # Typical MNIST accuracy fig, ax1 = plt.subplots(figsize=(10, 5)) # Model size bars bars = ax1.bar(range(len(bit_widths)), model_sizes, color='steelblue', alpha=0.7) ax1.set_xticks(range(len(bit_widths))) ax1.set_xticklabels([f'{b}-bit' for b in bit_widths]) ax1.set_ylabel('Model Size (%)', color='steelblue') ax1.tick_params(axis='y', labelcolor='steelblue') # Accuracy line ax2 = ax1.twinx() ax2.plot(range(len(bit_widths)), accuracies, 'ro-', linewidth=2, markersize=10) ax2.set_ylabel('Accuracy (%)', color='red') ax2.tick_params(axis='y', labelcolor='red') ax2.set_ylim(85, 100) plt.title('Quantization: Size vs Accuracy Trade-off') plt.tight_layout() plt.show() ``` ## Quantization Error Analysis ```{python} #| label: fig-error #| fig-cap: "Quantization error by bit width" errors = [] for bits in [2, 4, 8, 16]: quantized = quantize(fp32_weights, bits) error = np.abs(fp32_weights - quantized) errors.append(error) fig, ax = plt.subplots(figsize=(10, 5)) bp = ax.boxplot(errors, labels=['2-bit', '4-bit', '8-bit', '16-bit']) ax.set_ylabel('Absolute Error') ax.set_xlabel('Quantization Level') ax.set_title('Quantization Error Distribution') ax.set_yscale('log') plt.grid(True, alpha=0.3) plt.show() ``` ## Key Insights | Bit Width | Size Reduction | Typical Accuracy Loss | Use Case | |-----------|----------------|----------------------|----------| | 16-bit | 2x | < 0.1% | GPU inference | | 8-bit | 4x | < 0.5% | **Edge devices (recommended)** | | 4-bit | 8x | 1-3% | Extreme resource constraints | | 2-bit | 16x | 5-10% | Research only | ::: {.callout-tip} ## Recommendation **INT8 quantization** provides the best balance for edge deployment: - 4x size reduction - Minimal accuracy loss - Hardware acceleration on most devices ::: ## Try It Yourself 1. Open the [LAB03 notebook](https://github.com/ngcharithperera/edge-analytics-lab-book/blob/main/notebooks/LAB03_tflite_quantization.ipynb) 2. Train a model on CIFAR-10 3. Apply different quantization methods 4. Compare file sizes and accuracy ## Related Sections in PDF Book - Section 3.1: Why Quantization? - Section 3.2: Quantization Types - Section 3.3: Post-Training Quantization - Section 3.4: Quantization-Aware Training