From 8e81a4b6d2fe3bcb9a4d2abb7af20c4664517f9a Mon Sep 17 00:00:00 2001 From: Aleksei Kiselev Date: Tue, 27 Feb 2018 11:39:57 +0200 Subject: [PATCH] PCA fit method --- pca/pca.go | 33 +++++++++++++----- pca/pca_test.go | 91 +++++++++++++++++++++++++++++-------------------- 2 files changed, 78 insertions(+), 46 deletions(-) diff --git a/pca/pca.go b/pca/pca.go index 6c3c3ab..c268157 100644 --- a/pca/pca.go +++ b/pca/pca.go @@ -8,6 +8,7 @@ import ( type PCA struct { Num_components int + svd *mat64.SVD } // Number of components. 0 - by default, use number of features as number of components @@ -15,18 +16,21 @@ func NewPCA(num_components int) *PCA { return &PCA{Num_components: num_components} } -//Need return is base.FixedDataGrid -func (pca *PCA) Transform(X *mat64.Dense) *mat64.Dense { - //Prepare before PCA +// Fit PCA model and transform data +// Need return is base.FixedDataGrid +func (pca *PCA) FitTransform(X *mat64.Dense) *mat64.Dense { + return pca.Fit(X).Transform(X) +} - num_samples, num_features := X.Dims() - //Mean to input data +// Fit PCA model +func (pca *PCA) Fit(X *mat64.Dense) *PCA { + // Mean to input data M := mean(X) X = matrixSubVector(X, M) - //Get SVD decomposition from data - var svd mat64.SVD - ok := svd.Factorize(X, matrix.SVDThin) + // Get SVD decomposition from data + pca.svd = &mat64.SVD{} + ok := pca.svd.Factorize(X, matrix.SVDThin) if !ok { panic("Unable to factorize") } @@ -34,8 +38,19 @@ func (pca *PCA) Transform(X *mat64.Dense) *mat64.Dense { panic("Number of components can't be less than zero") } + return pca +} + +// Need return is base.FixedDataGrid +func (pca *PCA) Transform(X *mat64.Dense) *mat64.Dense { + if pca.svd == nil { + panic("You should to fit PCA model first") + } + + num_samples, num_features := X.Dims() + vTemp := new(mat64.Dense) - vTemp.VFromSVD(&svd) + vTemp.VFromSVD(pca.svd) //Compute to full data if pca.Num_components == 0 || pca.Num_components > num_features { return compute(X, vTemp) diff --git a/pca/pca_test.go b/pca/pca_test.go index 14dedf8..20595d1 100644 --- a/pca/pca_test.go +++ b/pca/pca_test.go @@ -1,62 +1,79 @@ package pca import ( - . "github.com/smartystreets/goconvey/convey" "testing" - "github.com/gonum/matrix/mat64" + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" ) -func TestPCAWithZeroComponents(t *testing.T){ - Convey("Set to pca 0 components with first matrix", t, func(){ - X1 := mat64.NewDense(3,7, []float64{6,5,4,3,8,2,9,5,1,10,2,3,8,7,5,14,2,3,6,3,2}) +func TestPCAWithZeroComponents(t *testing.T) { + Convey("Set to pca 0 components with first matrix", t, func() { + X1 := mat64.NewDense(3, 7, []float64{6, 5, 4, 3, 8, 2, 9, 5, 1, 10, 2, 3, 8, 7, 5, 14, 2, 3, 6, 3, 2}) pca := NewPCA(0) - rows, cols := pca.Transform(X1).Dims() + rows, cols := pca.FitTransform(X1).Dims() So(rows, ShouldEqual, 3) So(cols, ShouldEqual, 3) }) - Convey("Set to pca 0 components with second matrix", t, func(){ - X1 := mat64.NewDense(10,5, []float64{ - 0.52984892, 0.1141001 , 0.91599294, 0.9574267 , 0.15361222, - 0.07057588, 0.46371013, 0.73091854, 0.84641034, 0.08122213, - 0.96221946, 0.60367214, 0.69851546, 0.91965564, 0.27040597, - 0.03152856, 0.97912403, 0.39487038, 0.12232594, 0.18474705, - 0.77061953, 0.35898551, 0.78684562, 0.11638404, 0.88908044, - 0.35828086, 0.47214831, 0.95781755, 0.74762736, 0.59850757, - 0.07806127, 0.96940955, 0.15751804, 0.00973325, 0.85041635, - 0.02663938, 0.49755131, 0.57984119, 0.12233871, 0.47967853, - 0.63903222, 0.88556565, 0.79797963, 0.13345186, 0.37415535, - 0.60605207, 0.52067165, 0.91217494, 0.57148943, 0.92210331}) + Convey("Set to pca 0 components with second matrix", t, func() { + X1 := mat64.NewDense(10, 5, []float64{ + 0.52984892, 0.1141001, 0.91599294, 0.9574267, 0.15361222, + 0.07057588, 0.46371013, 0.73091854, 0.84641034, 0.08122213, + 0.96221946, 0.60367214, 0.69851546, 0.91965564, 0.27040597, + 0.03152856, 0.97912403, 0.39487038, 0.12232594, 0.18474705, + 0.77061953, 0.35898551, 0.78684562, 0.11638404, 0.88908044, + 0.35828086, 0.47214831, 0.95781755, 0.74762736, 0.59850757, + 0.07806127, 0.96940955, 0.15751804, 0.00973325, 0.85041635, + 0.02663938, 0.49755131, 0.57984119, 0.12233871, 0.47967853, + 0.63903222, 0.88556565, 0.79797963, 0.13345186, 0.37415535, + 0.60605207, 0.52067165, 0.91217494, 0.57148943, 0.92210331}) pca := NewPCA(0) - rows, cols := pca.Transform(X1).Dims() + rows, cols := pca.FitTransform(X1).Dims() So(rows, ShouldEqual, 10) So(cols, ShouldEqual, 5) }) } -func TestPCAWithNComponents(t *testing.T){ - Convey("Set to pca 3 components with 5x5 matrix", t, func(){ - X := mat64.NewDense(5,5, [] float64{ - 0.23030838, 0.05669317, 0.3187813 , 0.34455114, 0.98062806, - 0.38995469, 0.2996771 , 0.99043575, 0.04443827, 0.99527955, - 0.27266308, 0.14068906, 0.46999473, 0.03296131, 0.90855405, - 0.28360708, 0.8839966 , 0.81107014, 0.52673877, 0.59432817, - 0.64107253, 0.56165215, 0.79811756, 0.48845398, 0.20506649}) +func TestPCAWithNComponents(t *testing.T) { + Convey("Set to pca 3 components with 5x5 matrix", t, func() { + X := mat64.NewDense(5, 5, []float64{ + 0.23030838, 0.05669317, 0.3187813, 0.34455114, 0.98062806, + 0.38995469, 0.2996771, 0.99043575, 0.04443827, 0.99527955, + 0.27266308, 0.14068906, 0.46999473, 0.03296131, 0.90855405, + 0.28360708, 0.8839966, 0.81107014, 0.52673877, 0.59432817, + 0.64107253, 0.56165215, 0.79811756, 0.48845398, 0.20506649}) pca := NewPCA(3) - rows, cols := pca.Transform(X).Dims() + rows, cols := pca.FitTransform(X).Dims() So(rows, ShouldEqual, 5) So(cols, ShouldEqual, 3) }) - Convey("Set to pca 2 components with 3x5 matrix",t, func(){ - X := mat64.NewDense(3,5, [] float64{ - 0.12294845, 0.55170713, 0.67572832, 0.60615516, 0.38184551, - 0.93486821, 0.15120374, 0.89760169, 0.74715672, 0.81373931, - 0.42821569, 0.47457753, 0.18960954, 0.42466159, 0.34166049}) + Convey("Set to pca 2 components with 3x5 matrix", t, func() { + X := mat64.NewDense(3, 5, []float64{ + 0.12294845, 0.55170713, 0.67572832, 0.60615516, 0.38184551, + 0.93486821, 0.15120374, 0.89760169, 0.74715672, 0.81373931, + 0.42821569, 0.47457753, 0.18960954, 0.42466159, 0.34166049}) pca := NewPCA(2) - rows, cols := pca.Transform(X).Dims() - So(rows, ShouldEqual,3) - So(cols, ShouldEqual,2) + rows, cols := pca.FitTransform(X).Dims() + So(rows, ShouldEqual, 3) + So(cols, ShouldEqual, 2) }) -} \ No newline at end of file +} + +func TestPCAFitAndTransformSeparately(t *testing.T) { + Convey("Set to pca 3 components with 5x5 matrix", t, func() { + X := mat64.NewDense(5, 5, []float64{ + 0.23030838, 0.05669317, 0.3187813, 0.34455114, 0.98062806, + 0.38995469, 0.2996771, 0.99043575, 0.04443827, 0.99527955, + 0.27266308, 0.14068906, 0.46999473, 0.03296131, 0.90855405, + 0.28360708, 0.8839966, 0.81107014, 0.52673877, 0.59432817, + 0.64107253, 0.56165215, 0.79811756, 0.48845398, 0.20506649}) + pca := NewPCA(3) + pca.Fit(X) + + rows, cols := pca.Transform(X).Dims() + So(rows, ShouldEqual, 5) + So(cols, ShouldEqual, 3) + }) +}