Principal Component Analysis (PCA) is a powerful technique widely used in machine learning and data analysis to reduce the dimensionality of datasets while retaining the most important information. But how exactly does it work—and what does it look like?
In this post, I introduce an interactive web-based PCA visualizer that takes randomly generated 3D data and reduces it to 2D using PCA. You can tweak parameters like the number of data points, the noise level, and the strength of correlation to see how PCA behaves in real-time.
PCA transforms a dataset into a new coordinate system where the greatest variance comes to lie on the first coordinate (the first principal component), the second greatest on the second coordinate, and so on. This is done by:
Subtracting the mean of each feature to center the dataset.
Calculating the covariance matrix reveals how the variables relate.
Eigenvalues and eigenvectors are derived from the covariance matrix.
How much variance each principal component retains.
Below is the complete HTML + JS source. You can copy and paste it into any .html file and run it locally in your browser.
<div id="pca-visualization">
<style>
#pca-visualization {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
max-width: 100%;
margin: 0 auto;
padding: 15px;
}
#pca-visualization .plot-container {
margin: 20px 0;
height: 500px;
width: 100%;
}
#pca-visualization .formula {
background-color: #f8f9fa;
padding: 10px;
border-radius: 5px;
margin: 10px 0;
font-family: 'Courier New', monospace;
}
#pca-visualization .step-explanation {
margin: 20px 0;
padding: 15px;
border-left: 3px solid #007bff;
background-color: #f8f9fa;
}
#pca-visualization .control-panel {
background-color: #f8f9fa;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}
#pca-visualization .form-group {
margin-bottom: 15px;
}
#pca-visualization .form-label {
display: block;
margin-bottom: 5px;
font-weight: 500;
}
#pca-visualization .form-control {
width: 100%;
padding: 8px;
border: 1px solid #ced4da;
border-radius: 4px;
}
#pca-visualization .form-range {
width: 100%;
}
#pca-visualization .btn {
background-color: #007bff;
color: white;
padding: 8px 16px;
border: none;
border-radius: 4px;
cursor: pointer;
}
#pca-visualization .btn:hover {
background-color: #0056b3;
}
#pca-visualization .row {
display: flex;
flex-wrap: wrap;
margin: -10px;
}
#pca-visualization .col {
flex: 1;
padding: 10px;
min-width: 300px;
}
</style>
<h2>Principal Component Analysis (PCA) Visualization</h2>
<div class="control-panel">
<div class="form-group">
<label class="form-label" for="numPoints">Number of Points</label>
<input type="number" class="form-control" id="numPoints" value="100">
</div>
<div class="form-group">
<label class="form-label" for="noise">Noise Level</label>
<input type="range" class="form-range" id="noise" min="0" max="1" step="0.1" value="0.2">
</div>
<div class="form-group">
<label class="form-label" for="correlation">Correlation Strength</label>
<input type="range" class="form-range" id="correlation" min="0" max="1" step="0.1" value="0.7">
</div>
<button class="btn" onclick="generateAndAnalyze()">Generate & Analyze</button>
</div>
<div class="row">
<div class="col">
<h3>Original 3D Data</h3>
<div id="plot3d" class="plot-container"></div>
</div>
<div class="col">
<h3>PCA Result (2D Projection)</h3>
<div id="plot2d" class="plot-container"></div>
</div>
</div>
<div class="explanation-section">
<h3>PCA Step-by-Step Explanation</h3>
<div id="explanation"></div>
</div>
<!-- Load required libraries -->
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjs/9.4.4/math.js"></script>
<script>
// Function to generate 3D data with correlation and noise
function generateData(numPoints, noiseLevel, correlationStrength) {
const data = [];
for (let i = 0; i < numPoints; i++) {
const t = Math.random() * 2 * Math.PI;
const baseX = Math.cos(t) * correlationStrength;
const baseY = Math.sin(t) * correlationStrength;
const baseZ = (baseX + baseY) * correlationStrength;
const noise = () => (Math.random() - 0.5) * noiseLevel;
data.push([
baseX + noise(),
baseY + noise(),
baseZ + noise()
]);
}
return data;
}
// Function to center the data
function centerData(data) {
const n = data.length;
const mean = data[0].map((_, col) =>
data.reduce((sum, row) => sum + row[col], 0) / n
);
return {
centeredData: data.map(row =>
row.map((val, col) => val - mean[col])
),
mean: mean
};
}
// Function to calculate covariance matrix
function calculateCovarianceMatrix(centeredData) {
const n = centeredData.length;
const dims = centeredData[0].length;
const covMatrix = Array(dims).fill().map(() => Array(dims).fill(0));
for (let i = 0; i < dims; i++) {
for (let j = 0; j < dims; j++) {
let sum = 0;
for (let k = 0; k < n; k++) {
sum += centeredData[k][i] * centeredData[k][j];
}
covMatrix[i][j] = sum / (n - 1);
}
}
return covMatrix;
}
// Function to perform PCA
function performPCA(data) {
const { centeredData, mean } = centerData(data);
const covMatrix = calculateCovarianceMatrix(centeredData);
const { values: eigenvalues, vectors: eigenvectors } = math.eigs(covMatrix);
const indices = eigenvalues.map((val, idx) => idx)
.sort((a, b) => Math.abs(eigenvalues[b]) - Math.abs(eigenvalues[a]));
const sortedEigenvalues = indices.map(i => eigenvalues[i]);
const sortedEigenvectors = indices.map(i =>
math.squeeze(math.column(eigenvectors, i))
);
const projectedData = centeredData.map(point => {
return [
math.dot(point, sortedEigenvectors[0]),
math.dot(point, sortedEigenvectors[1])
];
});
return {
originalData: data,
centeredData,
mean,
covMatrix,
eigenvalues: sortedEigenvalues,
eigenvectors: sortedEigenvectors,
projectedData
};
}
// Function to update explanation
function updateExplanation(pcaResults) {
const explanation = document.getElementById('explanation');
const { covMatrix, eigenvalues, eigenvectors } = pcaResults;
const steps = [
{
title: "Step 1: Data Centering",
content: `First, we center the data by subtracting the mean from each feature. This ensures that the first principal component passes through the center of the data cloud.
<div class="formula">X_centered = X - μ</div>
where μ is the mean vector of the original data.`
},
{
title: "Step 2: Covariance Matrix",
content: `We calculate the covariance matrix to understand the relationships between variables:
<div class="formula">Σ = ${math.format(covMatrix, {precision: 3})}</div>`
},
{
title: "Step 3: Eigendecomposition",
content: `We find the eigenvalues and eigenvectors of the covariance matrix. The eigenvalues represent the amount of variance explained by each principal component:
<div class="formula">
Eigenvalues: [${eigenvalues.map(v => v.toFixed(3)).join(', ')}]<br>
Principal Components (Eigenvectors):<br>
PC1: [${eigenvectors[0].map(v => v.toFixed(3)).join(', ')}]<br>
PC2: [${eigenvectors[1].map(v => v.toFixed(3)).join(', ')}]
</div>`
},
{
title: "Step 4: Explained Variance",
content: `The proportion of variance explained by each principal component:
<div class="formula">
PC1: ${(eigenvalues[0] / math.sum(eigenvalues) * 100).toFixed(2)}%<br>
PC2: ${(eigenvalues[1] / math.sum(eigenvalues) * 100).toFixed(2)}%
</div>`
}
];
explanation.innerHTML = steps.map(step => `
<div class="step-explanation">
<h4>${step.title}</h4>
<p>${step.content}</p>
</div>
`).join('');
}
// Function to plot data
function plotData(pcaResults) {
const { originalData, projectedData } = pcaResults;
const trace3d = {
type: 'scatter3d',
mode: 'markers',
marker: {
size: 5,
color: originalData.map((_, i) => i),
colorscale: 'Viridis',
},
x: originalData.map(p => p[0]),
y: originalData.map(p => p[1]),
z: originalData.map(p => p[2])
};
Plotly.newPlot('plot3d', [trace3d], {
title: 'Original 3D Data',
margin: { l: 0, r: 0, b: 0, t: 30 }
});
const trace2d = {
type: 'scatter',
mode: 'markers',
marker: {
size: 8,
color: originalData.map((_, i) => i),
colorscale: 'Viridis',
},
x: projectedData.map(p => p[0]),
y: projectedData.map(p => p[1])
};
Plotly.newPlot('plot2d', [trace2d], {
title: 'PCA Projected Data (2D)',
xaxis: { title: 'First Principal Component' },
yaxis: { title: 'Second Principal Component' },
margin: { l: 50, r: 50, b: 50, t: 30 }
});
}
// Main function to generate and analyze data
function generateAndAnalyze() {
const numPoints = parseInt(document.getElementById('numPoints').value);
const noiseLevel = parseFloat(document.getElementById('noise').value);
const correlationStrength = parseFloat(document.getElementById('correlation').value);
const data = generateData(numPoints, noiseLevel, correlationStrength);
const pcaResults = performPCA(data);
plotData(pcaResults);
updateExplanation(pcaResults);
}
// Initial generation on page load
window.onload = generateAndAnalyze;
</script>
</div>
Comments
Post a Comment