1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-28 13:48:56 +08:00

PCA fit method

This commit is contained in:
Aleksei Kiselev 2018-02-27 11:39:57 +02:00
parent 99626ce479
commit 8e81a4b6d2
2 changed files with 78 additions and 46 deletions

View File

@ -8,6 +8,7 @@ import (
type PCA struct { type PCA struct {
Num_components int Num_components int
svd *mat64.SVD
} }
// Number of components. 0 - by default, use number of features as number of components // Number of components. 0 - by default, use number of features as number of components
@ -15,18 +16,21 @@ func NewPCA(num_components int) *PCA {
return &PCA{Num_components: num_components} return &PCA{Num_components: num_components}
} }
//Need return is base.FixedDataGrid // Fit PCA model and transform data
func (pca *PCA) Transform(X *mat64.Dense) *mat64.Dense { // Need return is base.FixedDataGrid
//Prepare before PCA func (pca *PCA) FitTransform(X *mat64.Dense) *mat64.Dense {
return pca.Fit(X).Transform(X)
}
num_samples, num_features := X.Dims() // Fit PCA model
//Mean to input data func (pca *PCA) Fit(X *mat64.Dense) *PCA {
// Mean to input data
M := mean(X) M := mean(X)
X = matrixSubVector(X, M) X = matrixSubVector(X, M)
//Get SVD decomposition from data // Get SVD decomposition from data
var svd mat64.SVD pca.svd = &mat64.SVD{}
ok := svd.Factorize(X, matrix.SVDThin) ok := pca.svd.Factorize(X, matrix.SVDThin)
if !ok { if !ok {
panic("Unable to factorize") panic("Unable to factorize")
} }
@ -34,8 +38,19 @@ func (pca *PCA) Transform(X *mat64.Dense) *mat64.Dense {
panic("Number of components can't be less than zero") panic("Number of components can't be less than zero")
} }
return pca
}
// Need return is base.FixedDataGrid
func (pca *PCA) Transform(X *mat64.Dense) *mat64.Dense {
if pca.svd == nil {
panic("You should to fit PCA model first")
}
num_samples, num_features := X.Dims()
vTemp := new(mat64.Dense) vTemp := new(mat64.Dense)
vTemp.VFromSVD(&svd) vTemp.VFromSVD(pca.svd)
//Compute to full data //Compute to full data
if pca.Num_components == 0 || pca.Num_components > num_features { if pca.Num_components == 0 || pca.Num_components > num_features {
return compute(X, vTemp) return compute(X, vTemp)

View File

@ -1,62 +1,79 @@
package pca package pca
import ( import (
. "github.com/smartystreets/goconvey/convey"
"testing" "testing"
"github.com/gonum/matrix/mat64"
"github.com/gonum/matrix/mat64"
. "github.com/smartystreets/goconvey/convey"
) )
func TestPCAWithZeroComponents(t *testing.T){ func TestPCAWithZeroComponents(t *testing.T) {
Convey("Set to pca 0 components with first matrix", t, func(){ Convey("Set to pca 0 components with first matrix", t, func() {
X1 := mat64.NewDense(3,7, []float64{6,5,4,3,8,2,9,5,1,10,2,3,8,7,5,14,2,3,6,3,2}) X1 := mat64.NewDense(3, 7, []float64{6, 5, 4, 3, 8, 2, 9, 5, 1, 10, 2, 3, 8, 7, 5, 14, 2, 3, 6, 3, 2})
pca := NewPCA(0) pca := NewPCA(0)
rows, cols := pca.Transform(X1).Dims() rows, cols := pca.FitTransform(X1).Dims()
So(rows, ShouldEqual, 3) So(rows, ShouldEqual, 3)
So(cols, ShouldEqual, 3) So(cols, ShouldEqual, 3)
}) })
Convey("Set to pca 0 components with second matrix", t, func(){ Convey("Set to pca 0 components with second matrix", t, func() {
X1 := mat64.NewDense(10,5, []float64{ X1 := mat64.NewDense(10, 5, []float64{
0.52984892, 0.1141001 , 0.91599294, 0.9574267 , 0.15361222, 0.52984892, 0.1141001, 0.91599294, 0.9574267, 0.15361222,
0.07057588, 0.46371013, 0.73091854, 0.84641034, 0.08122213, 0.07057588, 0.46371013, 0.73091854, 0.84641034, 0.08122213,
0.96221946, 0.60367214, 0.69851546, 0.91965564, 0.27040597, 0.96221946, 0.60367214, 0.69851546, 0.91965564, 0.27040597,
0.03152856, 0.97912403, 0.39487038, 0.12232594, 0.18474705, 0.03152856, 0.97912403, 0.39487038, 0.12232594, 0.18474705,
0.77061953, 0.35898551, 0.78684562, 0.11638404, 0.88908044, 0.77061953, 0.35898551, 0.78684562, 0.11638404, 0.88908044,
0.35828086, 0.47214831, 0.95781755, 0.74762736, 0.59850757, 0.35828086, 0.47214831, 0.95781755, 0.74762736, 0.59850757,
0.07806127, 0.96940955, 0.15751804, 0.00973325, 0.85041635, 0.07806127, 0.96940955, 0.15751804, 0.00973325, 0.85041635,
0.02663938, 0.49755131, 0.57984119, 0.12233871, 0.47967853, 0.02663938, 0.49755131, 0.57984119, 0.12233871, 0.47967853,
0.63903222, 0.88556565, 0.79797963, 0.13345186, 0.37415535, 0.63903222, 0.88556565, 0.79797963, 0.13345186, 0.37415535,
0.60605207, 0.52067165, 0.91217494, 0.57148943, 0.92210331}) 0.60605207, 0.52067165, 0.91217494, 0.57148943, 0.92210331})
pca := NewPCA(0) pca := NewPCA(0)
rows, cols := pca.Transform(X1).Dims() rows, cols := pca.FitTransform(X1).Dims()
So(rows, ShouldEqual, 10) So(rows, ShouldEqual, 10)
So(cols, ShouldEqual, 5) So(cols, ShouldEqual, 5)
}) })
} }
func TestPCAWithNComponents(t *testing.T){ func TestPCAWithNComponents(t *testing.T) {
Convey("Set to pca 3 components with 5x5 matrix", t, func(){ Convey("Set to pca 3 components with 5x5 matrix", t, func() {
X := mat64.NewDense(5,5, [] float64{ X := mat64.NewDense(5, 5, []float64{
0.23030838, 0.05669317, 0.3187813 , 0.34455114, 0.98062806, 0.23030838, 0.05669317, 0.3187813, 0.34455114, 0.98062806,
0.38995469, 0.2996771 , 0.99043575, 0.04443827, 0.99527955, 0.38995469, 0.2996771, 0.99043575, 0.04443827, 0.99527955,
0.27266308, 0.14068906, 0.46999473, 0.03296131, 0.90855405, 0.27266308, 0.14068906, 0.46999473, 0.03296131, 0.90855405,
0.28360708, 0.8839966 , 0.81107014, 0.52673877, 0.59432817, 0.28360708, 0.8839966, 0.81107014, 0.52673877, 0.59432817,
0.64107253, 0.56165215, 0.79811756, 0.48845398, 0.20506649}) 0.64107253, 0.56165215, 0.79811756, 0.48845398, 0.20506649})
pca := NewPCA(3) pca := NewPCA(3)
rows, cols := pca.Transform(X).Dims() rows, cols := pca.FitTransform(X).Dims()
So(rows, ShouldEqual, 5) So(rows, ShouldEqual, 5)
So(cols, ShouldEqual, 3) So(cols, ShouldEqual, 3)
}) })
Convey("Set to pca 2 components with 3x5 matrix",t, func(){ Convey("Set to pca 2 components with 3x5 matrix", t, func() {
X := mat64.NewDense(3,5, [] float64{ X := mat64.NewDense(3, 5, []float64{
0.12294845, 0.55170713, 0.67572832, 0.60615516, 0.38184551, 0.12294845, 0.55170713, 0.67572832, 0.60615516, 0.38184551,
0.93486821, 0.15120374, 0.89760169, 0.74715672, 0.81373931, 0.93486821, 0.15120374, 0.89760169, 0.74715672, 0.81373931,
0.42821569, 0.47457753, 0.18960954, 0.42466159, 0.34166049}) 0.42821569, 0.47457753, 0.18960954, 0.42466159, 0.34166049})
pca := NewPCA(2) pca := NewPCA(2)
rows, cols := pca.Transform(X).Dims() rows, cols := pca.FitTransform(X).Dims()
So(rows, ShouldEqual,3) So(rows, ShouldEqual, 3)
So(cols, ShouldEqual,2) So(cols, ShouldEqual, 2)
}) })
} }
func TestPCAFitAndTransformSeparately(t *testing.T) {
Convey("Set to pca 3 components with 5x5 matrix", t, func() {
X := mat64.NewDense(5, 5, []float64{
0.23030838, 0.05669317, 0.3187813, 0.34455114, 0.98062806,
0.38995469, 0.2996771, 0.99043575, 0.04443827, 0.99527955,
0.27266308, 0.14068906, 0.46999473, 0.03296131, 0.90855405,
0.28360708, 0.8839966, 0.81107014, 0.52673877, 0.59432817,
0.64107253, 0.56165215, 0.79811756, 0.48845398, 0.20506649})
pca := NewPCA(3)
pca.Fit(X)
rows, cols := pca.Transform(X).Dims()
So(rows, ShouldEqual, 5)
So(cols, ShouldEqual, 3)
})
}