mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-28 13:48:56 +08:00
Implementation of PCA with mat64.Dense
This commit is contained in:
parent
2b6533e659
commit
0217fef0c2
83
pca/pca.go
Normal file
83
pca/pca.go
Normal file
@ -0,0 +1,83 @@
|
||||
//Implementation of Principal Component Analysis(PCA) with SVD
|
||||
package pca
|
||||
|
||||
import
|
||||
(
|
||||
"github.com/gonum/matrix/mat64"
|
||||
"math"
|
||||
)
|
||||
|
||||
type PCA struct {
|
||||
Num_components int
|
||||
}
|
||||
|
||||
|
||||
// Number of components. 0 - by default, use number of features as number of components
|
||||
func NewPCA(num_components int)*PCA {
|
||||
return &PCA {Num_components: num_components}
|
||||
}
|
||||
|
||||
|
||||
//Need return is base.FixedDataGrid
|
||||
func (pca*PCA) Transform(X *mat64.Dense) (*mat64.Dense){
|
||||
//Prepare before PCA
|
||||
|
||||
num_samples, num_features := X.Dims()
|
||||
//Mean to input data
|
||||
M := mean(X)
|
||||
X = matrixSubVector(X, M)
|
||||
|
||||
//Get SVD decomposition from data
|
||||
svd := mat64.SVD(mat64.DenseCopyOf(X),math.Pow(2, -52.0),math.Pow(2, -966.0),false, true)
|
||||
if pca.Num_components < 0 {
|
||||
panic("Number of components can't be less than zero")
|
||||
}
|
||||
|
||||
//Compute to full data
|
||||
if pca.Num_components == 0 || pca.Num_components > num_features{
|
||||
return compute(X,svd.V)
|
||||
}
|
||||
|
||||
X = compute(X,svd.V)
|
||||
result := mat64.NewDense(num_samples, pca.Num_components, nil)
|
||||
result.Copy(X.View(0,0,num_samples,pca.Num_components))
|
||||
return result
|
||||
}
|
||||
|
||||
|
||||
//Helpful private functions
|
||||
|
||||
//Compute mean of the columns of input matrix
|
||||
func mean(matrix *mat64.Dense)*mat64.Dense{
|
||||
rows, cols := matrix.Dims()
|
||||
meanVector := make([]float64, cols)
|
||||
for i := 0; i < cols; i++ {
|
||||
sum := 0.0
|
||||
for _, it := range matrix.Col(nil,i){
|
||||
sum += it
|
||||
}
|
||||
meanVector[i] = sum/float64(rows)
|
||||
}
|
||||
return mat64.NewDense(1, cols, meanVector)
|
||||
}
|
||||
|
||||
// After computing of mean, compute: X(input matrix) - X(mean vector)
|
||||
func matrixSubVector(mat, vec *mat64.Dense)*mat64.Dense {
|
||||
rowsm, colsm := mat.Dims()
|
||||
_, colsv := vec.Dims()
|
||||
if colsv != colsm {
|
||||
panic("Error in dimension")
|
||||
}
|
||||
for i := 0; i < rowsm; i++ {
|
||||
for j := 0; j < colsm; j++ {
|
||||
mat.Set(i,j, (mat.At(i,j) - vec.At(0,j)))
|
||||
}
|
||||
}
|
||||
return mat
|
||||
}
|
||||
|
||||
//Multiplication of X(input data) and V(from SVD)
|
||||
func compute (X, Y *mat64.Dense) *mat64.Dense{
|
||||
X.Mul(X,Y)
|
||||
return X
|
||||
}
|
62
pca/pca_test.go
Normal file
62
pca/pca_test.go
Normal file
@ -0,0 +1,62 @@
|
||||
package pca
|
||||
|
||||
import (
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
"github.com/gonum/matrix/mat64"
|
||||
|
||||
)
|
||||
|
||||
func TestPCAWithZeroComponents(t *testing.T){
|
||||
Convey("Set to pca 0 components with first matrix", t, func(){
|
||||
X1 := mat64.NewDense(3,7, []float64{6,5,4,3,8,2,9,5,1,10,2,3,8,7,5,14,2,3,6,3,2})
|
||||
pca := NewPCA(0)
|
||||
rows, cols := pca.Transform(X1).Dims()
|
||||
So(rows, ShouldEqual, 3)
|
||||
So(cols, ShouldEqual, 3)
|
||||
})
|
||||
|
||||
Convey("Set to pca 0 components with second matrix", t, func(){
|
||||
X1 := mat64.NewDense(10,5, []float64{
|
||||
0.52984892, 0.1141001 , 0.91599294, 0.9574267 , 0.15361222,
|
||||
0.07057588, 0.46371013, 0.73091854, 0.84641034, 0.08122213,
|
||||
0.96221946, 0.60367214, 0.69851546, 0.91965564, 0.27040597,
|
||||
0.03152856, 0.97912403, 0.39487038, 0.12232594, 0.18474705,
|
||||
0.77061953, 0.35898551, 0.78684562, 0.11638404, 0.88908044,
|
||||
0.35828086, 0.47214831, 0.95781755, 0.74762736, 0.59850757,
|
||||
0.07806127, 0.96940955, 0.15751804, 0.00973325, 0.85041635,
|
||||
0.02663938, 0.49755131, 0.57984119, 0.12233871, 0.47967853,
|
||||
0.63903222, 0.88556565, 0.79797963, 0.13345186, 0.37415535,
|
||||
0.60605207, 0.52067165, 0.91217494, 0.57148943, 0.92210331})
|
||||
pca := NewPCA(0)
|
||||
rows, cols := pca.Transform(X1).Dims()
|
||||
So(rows, ShouldEqual, 10)
|
||||
So(cols, ShouldEqual, 5)
|
||||
})
|
||||
}
|
||||
|
||||
func TestPCAWithNComponents(t *testing.T){
|
||||
Convey("Set to pca 3 components with 5x5 matrix", t, func(){
|
||||
X := mat64.NewDense(5,5, [] float64{
|
||||
0.23030838, 0.05669317, 0.3187813 , 0.34455114, 0.98062806,
|
||||
0.38995469, 0.2996771 , 0.99043575, 0.04443827, 0.99527955,
|
||||
0.27266308, 0.14068906, 0.46999473, 0.03296131, 0.90855405,
|
||||
0.28360708, 0.8839966 , 0.81107014, 0.52673877, 0.59432817,
|
||||
0.64107253, 0.56165215, 0.79811756, 0.48845398, 0.20506649})
|
||||
pca := NewPCA(3)
|
||||
rows, cols := pca.Transform(X).Dims()
|
||||
So(rows, ShouldEqual, 5)
|
||||
So(cols, ShouldEqual, 3)
|
||||
})
|
||||
|
||||
Convey("Set to pca 2 components with 3x5 matrix",t, func(){
|
||||
X := mat64.NewDense(3,5, [] float64{
|
||||
0.12294845, 0.55170713, 0.67572832, 0.60615516, 0.38184551,
|
||||
0.93486821, 0.15120374, 0.89760169, 0.74715672, 0.81373931,
|
||||
0.42821569, 0.47457753, 0.18960954, 0.42466159, 0.34166049})
|
||||
pca := NewPCA(2)
|
||||
rows, cols := pca.Transform(X).Dims()
|
||||
So(rows, ShouldEqual,3)
|
||||
So(cols, ShouldEqual,2)
|
||||
})
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user