diff --git a/README.md b/README.md index 1e687c7..3a01e95 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ GoLearn A small start on a machine learning library in Go. +[Doc](http://godoc.org/github.com/sjwhitworth/golearn). Install ======= diff --git a/knn/knn.go b/knn/knn.go index 6e4009f..9b5c675 100644 --- a/knn/knn.go +++ b/knn/knn.go @@ -2,6 +2,7 @@ package knn import ( base "github.com/sjwhitworth/golearn/base" + pairwiseMetrics "github.com/sjwhitworth/golearn/metrics/pairwise" util "github.com/sjwhitworth/golearn/utilities" mat "github.com/skelterjohn/go.matrix" ) @@ -34,7 +35,8 @@ func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int row := KNN.Data.GetRowVector(i) //Will put code in to check errs later - eucdistance, _ := util.ComputeDistance(KNN.DistanceFunc, row, vector) + euclidean := pairwiseMetrics.NewEuclidean() + eucdistance, _ := euclidean.Distance(row, vector) rownumbers[i] = eucdistance } diff --git a/lm/linear_regression.go b/lm/linear_regression.go index 2900b03..7bae4be 100644 --- a/lm/linear_regression.go +++ b/lm/linear_regression.go @@ -1,13 +1,5 @@ package lm -import ( - "fmt" - mat "github.com/skelterjohn/go.matrix" - base "golearn/base" - util "golearn/utilities" - "math" -) - type LinearModel struct { - base.BaseRegressor + // base.BaseRegressor } diff --git a/metrics/pairwise/euclidean.go b/metrics/pairwise/euclidean.go new file mode 100644 index 0000000..e61eae6 --- /dev/null +++ b/metrics/pairwise/euclidean.go @@ -0,0 +1,31 @@ +package pairwise + +import ( + "math" + + "github.com/gonum/matrix/mat64" +) + +type Euclidean struct{} + +func NewEuclidean() *Euclidean { + return &Euclidean{} +} + +// Compute usual inner product in the sense of euclidean. +func (self *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { + result := vectorX.Dot(vectorY) + + return result +} + +// Compute usual distance in the sense of euclidean. +// Also known as L2 distance. +func (self *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { + subVector := mat64.NewDense(0, 0, nil) + subVector.Sub(vectorX, vectorY) + + result := self.InnerProduct(subVector, subVector) + + return math.Sqrt(result) +} diff --git a/metrics/pairwise/euclidean_test.go b/metrics/pairwise/euclidean_test.go new file mode 100644 index 0000000..d022ae6 --- /dev/null +++ b/metrics/pairwise/euclidean_test.go @@ -0,0 +1,36 @@ +package pairwise + +import ( + "testing" + + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" +) + +func TestEuclidean(t *testing.T) { + var vectorX, vectorY *mat64.Dense + euclidean := NewEuclidean() + + Convey("Given two vectors", t, func() { + vectorX = mat64.NewDense(3, 1, []float64{1, 2, 3}) + vectorY = mat64.NewDense(3, 1, []float64{2, 4, 5}) + + Convey("When doing inner product", func() { + result := euclidean.InnerProduct(vectorX, vectorY) + + Convey("The result should be 25", func() { + So(result, ShouldEqual, 25) + }) + }) + + Convey("When calculating distance", func() { + result := euclidean.Distance(vectorX, vectorY) + + Convey("The result should be 3", func() { + So(result, ShouldEqual, 3) + }) + + }) + + }) +} diff --git a/metrics/pairwise/manhattan.go b/metrics/pairwise/manhattan.go new file mode 100644 index 0000000..20b319a --- /dev/null +++ b/metrics/pairwise/manhattan.go @@ -0,0 +1,40 @@ +package pairwise + +import ( + "math" + + "github.com/gonum/matrix/mat64" +) + +type Manhattan struct{} + +func NewManhattan() *Manhattan { + return &Manhattan{} +} + +// Manhattan distance, also known as L1 distance. +// Compute sum of absolute values of elements. +func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { + var length int + subVector := mat64.NewDense(0, 0, nil) + subVector.Sub(vectorX, vectorY) + + r, c := subVector.Dims() + + if r == 1 { + // Force transpose to column vector + subVector.TCopy(subVector) + length = c + } else if c == 1 { + length = r + } else { + panic(mat64.ErrShape) + } + + result := .0 + for i := 0; i < length; i++ { + result += math.Abs(subVector.At(i, 0)) + } + + return result +} diff --git a/metrics/pairwise/manhattan_test.go b/metrics/pairwise/manhattan_test.go new file mode 100644 index 0000000..e6361e4 --- /dev/null +++ b/metrics/pairwise/manhattan_test.go @@ -0,0 +1,42 @@ +package pairwise + +import ( + "testing" + + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" +) + +func TestManhattan(t *testing.T) { + var vectorX, vectorY *mat64.Dense + manhattan := NewManhattan() + + Convey("Given two vectors", t, func() { + vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3}) + vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5}) + + Convey("When calculating distance with column vectors", func() { + result := manhattan.Distance(vectorX, vectorY) + + Convey("The result should be 5", func() { + So(result, ShouldEqual, 5) + }) + }) + + Convey("When calculating distance with row vectors", func() { + vectorX.TCopy(vectorX) + vectorY.TCopy(vectorY) + result := manhattan.Distance(vectorX, vectorY) + + Convey("The result should be 5", func() { + So(result, ShouldEqual, 5) + }) + }) + + Convey("When calculating distance with row and column vectors", func() { + vectorX.TCopy(vectorX) + So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) + }) + + }) +} diff --git a/metrics/pairwise/pairwise.go b/metrics/pairwise/pairwise.go new file mode 100644 index 0000000..6c6a5dc --- /dev/null +++ b/metrics/pairwise/pairwise.go @@ -0,0 +1,2 @@ +// Package pairwise implements utilities to evaluate pairwise distances or inner product (via kernel). +package pairwise diff --git a/metrics/pairwise/poly_kernel.go b/metrics/pairwise/poly_kernel.go new file mode 100644 index 0000000..03e9ef3 --- /dev/null +++ b/metrics/pairwise/poly_kernel.go @@ -0,0 +1,34 @@ +package pairwise + +import ( + "math" + + "github.com/gonum/matrix/mat64" +) + +type PolyKernel struct { + degree int +} + +// Return a d-degree polynomial kernel +func NewPolyKernel(degree int) *PolyKernel { + return &PolyKernel{degree: degree} +} + +// Compute inner product through kernel trick +// K(x, y) = (x^T y + 1)^d +func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { + result := vectorX.Dot(vectorY) + result = math.Pow(result+1, float64(self.degree)) + + return result +} + +// Compute distance under the polynomial kernel, maybe no need. +func (self *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { + subVector := mat64.NewDense(0, 0, nil) + subVector.Sub(vectorX, vectorY) + result := self.InnerProduct(subVector, subVector) + + return math.Sqrt(result) +} diff --git a/metrics/pairwise/poly_kernel_test.go b/metrics/pairwise/poly_kernel_test.go new file mode 100644 index 0000000..31f508f --- /dev/null +++ b/metrics/pairwise/poly_kernel_test.go @@ -0,0 +1,36 @@ +package pairwise + +import ( + "testing" + + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" +) + +func TestPolyKernel(t *testing.T) { + var vectorX, vectorY *mat64.Dense + polyKernel := NewPolyKernel(3) + + Convey("Given two vectors", t, func() { + vectorX = mat64.NewDense(3, 1, []float64{1, 2, 3}) + vectorY = mat64.NewDense(3, 1, []float64{2, 4, 5}) + + Convey("When doing inner product", func() { + result := polyKernel.InnerProduct(vectorX, vectorY) + + Convey("The result should be 17576", func() { + So(result, ShouldEqual, 17576) + }) + }) + + Convey("When calculating distance", func() { + result := polyKernel.Distance(vectorX, vectorY) + + Convey("The result should be 31.622776601683793", func() { + So(result, ShouldEqual, 31.622776601683793) + }) + + }) + + }) +} diff --git a/metrics/pairwise/rbf_kernel.go b/metrics/pairwise/rbf_kernel.go new file mode 100644 index 0000000..af02463 --- /dev/null +++ b/metrics/pairwise/rbf_kernel.go @@ -0,0 +1,27 @@ +package pairwise + +import ( + "math" + + "github.com/gonum/matrix/mat64" +) + +type RBFKernel struct { + gamma float64 +} + +// Radial Basis Function Kernel +func NewRBFKernel(gamma float64) *RBFKernel { + return &RBFKernel{gamma: gamma} +} + +// Compute inner product through kernel trick +// K(x, y) = exp(-gamma * ||x - y||^2) +func (self *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { + euclidean := NewEuclidean() + distance := euclidean.Distance(vectorX, vectorY) + + result := math.Exp(-self.gamma * math.Pow(distance, 2)) + + return result +} diff --git a/metrics/pairwise/rbf_kernel_test.go b/metrics/pairwise/rbf_kernel_test.go new file mode 100644 index 0000000..5cb6f48 --- /dev/null +++ b/metrics/pairwise/rbf_kernel_test.go @@ -0,0 +1,28 @@ +package pairwise + +import ( + "testing" + + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" +) + +func TestRBFKernel(t *testing.T) { + var vectorX, vectorY *mat64.Dense + rbfKernel := NewRBFKernel(0.1) + + Convey("Given two vectors", t, func() { + vectorX = mat64.NewDense(3, 1, []float64{1, 2, 3}) + vectorY = mat64.NewDense(3, 1, []float64{2, 4, 5}) + + Convey("When doing inner product", func() { + result := rbfKernel.InnerProduct(vectorX, vectorY) + + Convey("The result should be 0.4065696597405991", func() { + So(result, ShouldEqual, 0.4065696597405991) + + }) + }) + + }) +} diff --git a/utilities/distance.go b/utilities/distance.go deleted file mode 100644 index d044fd2..0000000 --- a/utilities/distance.go +++ /dev/null @@ -1,36 +0,0 @@ -package utilities - -import ( - "fmt" - "math" - - mat "github.com/skelterjohn/go.matrix" -) - -// Computes the 'distance' between two vectors, where the distance is one of the following methods - -// euclidean (more to come) -func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) (float64, error) { - var sum float64 - - switch metric { - case "euclidean": - { - difference, err := testrow.MinusDense(vector) - flat := difference.Array() - - if err != nil { - fmt.Println(err) - } - - for _, i := range flat { - squared := math.Pow(i, 2) - sum += squared - } - - eucdistance := math.Sqrt(sum) - return eucdistance, nil - } - default: - return 0.0, fmt.Errorf("ValueError: %s is not an implemented distance method", metric) - } -}