1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-30 13:48:57 +08:00
golearn/pca/pca.go

84 lines
2.0 KiB
Go
Raw Normal View History

2015-03-12 02:51:31 +05:00
//Implementation of Principal Component Analysis(PCA) with SVD
package pca
import (
"github.com/gonum/matrix"
2015-03-12 02:51:31 +05:00
"github.com/gonum/matrix/mat64"
)
type PCA struct {
Num_components int
}
// Number of components. 0 - by default, use number of features as number of components
func NewPCA(num_components int) *PCA {
return &PCA{Num_components: num_components}
2015-03-12 02:51:31 +05:00
}
//Need return is base.FixedDataGrid
func (pca *PCA) Transform(X *mat64.Dense) *mat64.Dense {
2015-03-12 02:51:31 +05:00
//Prepare before PCA
num_samples, num_features := X.Dims()
//Mean to input data
M := mean(X)
X = matrixSubVector(X, M)
//Get SVD decomposition from data
var svd mat64.SVD
ok := svd.Factorize(X, matrix.SVDThin)
if !ok {
panic("Unable to factorize")
}
2015-03-12 02:51:31 +05:00
if pca.Num_components < 0 {
panic("Number of components can't be less than zero")
}
vTemp := new(mat64.Dense)
vTemp.VFromSVD(&svd)
2015-03-12 02:51:31 +05:00
//Compute to full data
if pca.Num_components == 0 || pca.Num_components > num_features {
return compute(X, vTemp)
2015-03-12 02:51:31 +05:00
}
X = compute(X, vTemp)
2015-03-12 02:51:31 +05:00
result := mat64.NewDense(num_samples, pca.Num_components, nil)
result.Copy(X.View(0, 0, num_samples, pca.Num_components))
2015-03-12 02:51:31 +05:00
return result
}
//Helpful private functions
//Compute mean of the columns of input matrix
func mean(matrix *mat64.Dense) *mat64.Dense {
2015-03-12 02:51:31 +05:00
rows, cols := matrix.Dims()
meanVector := make([]float64, cols)
for i := 0; i < cols; i++ {
sum := mat64.Sum(matrix.ColView(i))
meanVector[i] = sum / float64(rows)
2015-03-12 02:51:31 +05:00
}
return mat64.NewDense(1, cols, meanVector)
}
// After computing of mean, compute: X(input matrix) - X(mean vector)
func matrixSubVector(mat, vec *mat64.Dense) *mat64.Dense {
2015-03-12 02:51:31 +05:00
rowsm, colsm := mat.Dims()
_, colsv := vec.Dims()
if colsv != colsm {
panic("Error in dimension")
}
for i := 0; i < rowsm; i++ {
for j := 0; j < colsm; j++ {
mat.Set(i, j, (mat.At(i, j) - vec.At(0, j)))
2015-03-12 02:51:31 +05:00
}
}
return mat
}
//Multiplication of X(input data) and V(from SVD)
func compute(X, Y mat64.Matrix) *mat64.Dense {
var ret mat64.Dense
ret.Mul(X, Y)
return &ret
}