mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-26 13:49:14 +08:00
Finished an implementation of KNN
This commit is contained in:
commit
822b4c389f
@ -4,6 +4,7 @@ GoLearn
|
|||||||
<img src="http://talks.golang.org/2013/advconc/gopherhat.jpg" width=125>
|
<img src="http://talks.golang.org/2013/advconc/gopherhat.jpg" width=125>
|
||||||
|
|
||||||
A small start on a machine learning library in Go.
|
A small start on a machine learning library in Go.
|
||||||
|
[Doc](http://godoc.org/github.com/sjwhitworth/golearn).
|
||||||
|
|
||||||
Install
|
Install
|
||||||
=======
|
=======
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
package base
|
package base
|
||||||
|
|
||||||
import (
|
import (
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
mat64 "github.com/gonum/matrix/mat64"
|
||||||
)
|
)
|
||||||
|
|
||||||
// An object that can ingest some data and train on it.
|
// An object that can ingest some data and train on it.
|
||||||
@ -25,5 +25,6 @@ type Model interface {
|
|||||||
|
|
||||||
// @todo: Implement BaseEstimator setters and getters.
|
// @todo: Implement BaseEstimator setters and getters.
|
||||||
type BaseEstimator struct {
|
type BaseEstimator struct {
|
||||||
Data *mat.DenseMatrix
|
Estimator
|
||||||
|
Data *mat64.Dense
|
||||||
}
|
}
|
||||||
|
@ -3,10 +3,10 @@ package main
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
mat64 "github.com/gonum/matrix/mat64"
|
||||||
data "github.com/sjwhitworth/golearn/data"
|
data "github.com/sjwhitworth/golearn/data"
|
||||||
knn "github.com/sjwhitworth/golearn/knn"
|
knn "github.com/sjwhitworth/golearn/knn"
|
||||||
util "github.com/sjwhitworth/golearn/utilities"
|
util "github.com/sjwhitworth/golearn/utilities"
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@ -21,10 +21,10 @@ func main() {
|
|||||||
randArray := util.RandomArray(3, 7)
|
randArray := util.RandomArray(3, 7)
|
||||||
|
|
||||||
//Initialises a vector with this array
|
//Initialises a vector with this array
|
||||||
random := mat.MakeDenseMatrix(randArray, 1, 3)
|
random := mat64.NewDense(1, 3, randArray)
|
||||||
|
|
||||||
//Calculates the Euclidean distance and returns the most popular label
|
//Calculates the Euclidean distance and returns the most popular label
|
||||||
labels, _ := cls.Predict(random, 3)
|
labels := cls.Predict(random, 3)
|
||||||
fmt.Println(labels)
|
fmt.Println(labels)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,10 +3,10 @@ package main
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
data "github.com/sjwhitworth/golearn/data"
|
data "github.com/sjwhitworth/golearn/data"
|
||||||
knn "github.com/sjwhitworth/golearn/knn"
|
knn "github.com/sjwhitworth/golearn/knn"
|
||||||
util "github.com/sjwhitworth/golearn/utilities"
|
util "github.com/sjwhitworth/golearn/utilities"
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@ -22,10 +22,10 @@ func main() {
|
|||||||
randArray := util.RandomArray(2, 100)
|
randArray := util.RandomArray(2, 100)
|
||||||
|
|
||||||
//Initialises a vector with this array
|
//Initialises a vector with this array
|
||||||
random := mat.MakeDenseMatrix(randArray, 1, 2)
|
random := mat64.NewDense(1, 2, randArray)
|
||||||
|
|
||||||
//Calculates the Euclidean distance and returns the most popular label
|
//Calculates the Euclidean distance and returns the most popular label
|
||||||
outcome, _ := cls.Predict(random, 3)
|
outcome := cls.Predict(random, 3)
|
||||||
fmt.Println(outcome)
|
fmt.Println(outcome)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
57
knn/knn.go
57
knn/knn.go
@ -1,52 +1,77 @@
|
|||||||
/* Package KNN implements a K Nearest Neighbors object. It is capable of both classification
|
/* Package KNN implements a K Nearest Neighbors object. It is capable of both classification
|
||||||
and regression. It accepts data in the form of a list of float64s, which are then reshaped
|
and regression. It accepts data in the form of a slice of float64s, which are then reshaped
|
||||||
into a X by Y matrix. */
|
into a X by Y matrix. */
|
||||||
|
|
||||||
package knn
|
package knn
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
base "github.com/sjwhitworth/golearn/base"
|
base "github.com/sjwhitworth/golearn/base"
|
||||||
|
pairwiseMetrics "github.com/sjwhitworth/golearn/metrics/pairwise"
|
||||||
util "github.com/sjwhitworth/golearn/utilities"
|
util "github.com/sjwhitworth/golearn/utilities"
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
//A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a name.
|
// A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
|
||||||
|
// The accepted distance functions at this time are 'euclidean' and 'manhattan'.
|
||||||
type KNNClassifier struct {
|
type KNNClassifier struct {
|
||||||
base.BaseEstimator
|
base.BaseEstimator
|
||||||
Labels []string
|
Labels []string
|
||||||
DistanceFunc string
|
DistanceFunc string
|
||||||
}
|
}
|
||||||
|
|
||||||
//Mints a new classifier.
|
// Returns a new classifier
|
||||||
func NewKnnClassifier(labels []string, numbers []float64, x int, y int, distfunc string) *KNNClassifier {
|
func NewKnnClassifier(labels []string, numbers []float64, rows int, cols int, distfunc string) *KNNClassifier {
|
||||||
|
if rows != len(labels) {
|
||||||
|
panic("Number of rows must equal number of labels")
|
||||||
|
}
|
||||||
|
|
||||||
KNN := KNNClassifier{}
|
KNN := KNNClassifier{}
|
||||||
KNN.Data = mat.MakeDenseMatrix(numbers, x, y)
|
KNN.Data = mat64.NewDense(rows, cols, numbers)
|
||||||
KNN.Labels = labels
|
KNN.Labels = labels
|
||||||
KNN.DistanceFunc = distfunc
|
KNN.DistanceFunc = distfunc
|
||||||
return &KNN
|
return &KNN
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
||||||
// @todo: Lots of room to improve this. V messy.
|
// See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
|
||||||
func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int) {
|
func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
|
||||||
|
|
||||||
rows := KNN.Data.Rows()
|
convertedVector := util.FloatsToMatrix(vector)
|
||||||
|
// Get the number of rows
|
||||||
|
rows, _ := KNN.Data.Dims()
|
||||||
rownumbers := make(map[int]float64)
|
rownumbers := make(map[int]float64)
|
||||||
labels := make([]string, 0)
|
labels := make([]string, 0)
|
||||||
maxmap := make(map[string]int)
|
maxmap := make(map[string]int)
|
||||||
|
|
||||||
for i := 0; i < rows; i++ {
|
// Check what distance function we are using
|
||||||
row := KNN.Data.GetRowVector(i)
|
switch KNN.DistanceFunc {
|
||||||
|
case "euclidean":
|
||||||
//Will put code in to check errs later
|
{
|
||||||
eucdistance, _ := util.ComputeDistance(KNN.DistanceFunc, row, vector)
|
euclidean := pairwiseMetrics.NewEuclidean()
|
||||||
rownumbers[i] = eucdistance
|
for i := 0; i < rows; i++ {
|
||||||
|
row := KNN.Data.RowView(i)
|
||||||
|
rowMat := util.FloatsToMatrix(row)
|
||||||
|
distance := euclidean.Distance(rowMat, convertedVector)
|
||||||
|
rownumbers[i] = distance
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case "manhattan":
|
||||||
|
{
|
||||||
|
manhattan := pairwiseMetrics.NewEuclidean()
|
||||||
|
for i := 0; i < rows; i++ {
|
||||||
|
row := KNN.Data.RowView(i)
|
||||||
|
rowMat := util.FloatsToMatrix(row)
|
||||||
|
distance := manhattan.Distance(rowMat, convertedVector)
|
||||||
|
rownumbers[i] = distance
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sorted := util.SortIntMap(rownumbers)
|
sorted := util.SortIntMap(rownumbers)
|
||||||
values := sorted[:K]
|
values := sorted[:K]
|
||||||
|
|
||||||
for _, elem := range values {
|
for _, elem := range values {
|
||||||
|
// It's when we access this map
|
||||||
labels = append(labels, KNN.Labels[elem])
|
labels = append(labels, KNN.Labels[elem])
|
||||||
|
|
||||||
if _, ok := maxmap[KNN.Labels[elem]]; ok {
|
if _, ok := maxmap[KNN.Labels[elem]]; ok {
|
||||||
@ -59,5 +84,5 @@ func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int
|
|||||||
sortedlabels := util.SortStringMap(maxmap)
|
sortedlabels := util.SortStringMap(maxmap)
|
||||||
label := sortedlabels[0]
|
label := sortedlabels[0]
|
||||||
|
|
||||||
return label, values
|
return label
|
||||||
}
|
}
|
||||||
|
33
knn/knn_test.go
Normal file
33
knn/knn_test.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
package knn
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/smartystreets/goconvey/convey"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestKnnClassifier(t *testing.T) {
|
||||||
|
Convey("Given labels, a classifier and data", t, func() {
|
||||||
|
labels := []string{"blue", "blue", "red", "red"}
|
||||||
|
data := []float64{1, 1, 1, 1, 1, 1, 3, 3, 3, 6, 6, 6}
|
||||||
|
cls := NewKnnClassifier(labels, data, 4, 3, "euclidean")
|
||||||
|
|
||||||
|
Convey("When predicting the label for our first vector", func() {
|
||||||
|
// The vector we're going to predict
|
||||||
|
vector := []float64{1.2, 1.2, 1.5}
|
||||||
|
result := cls.Predict(vector, 2)
|
||||||
|
Convey("The result should be 'blue", func() {
|
||||||
|
So(result, ShouldEqual, "blue")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Convey("When predicting the label for our first vector", func() {
|
||||||
|
// The vector we're going to predict
|
||||||
|
vector2 := []float64{5, 5, 5}
|
||||||
|
result2 := cls.Predict(vector2, 2)
|
||||||
|
Convey("The result should be 'red", func() {
|
||||||
|
So(result2, ShouldEqual, "red")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
@ -3,48 +3,70 @@
|
|||||||
package knn
|
package knn
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
"github.com/sjwhitworth/golearn/base"
|
||||||
|
pairwiseMetrics "github.com/sjwhitworth/golearn/metrics/pairwise"
|
||||||
util "github.com/sjwhitworth/golearn/utilities"
|
util "github.com/sjwhitworth/golearn/utilities"
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
||||||
type KNNRegressor struct {
|
type KNNRegressor struct {
|
||||||
Data *mat.DenseMatrix
|
base.BaseEstimator
|
||||||
Labels []float64
|
Values []float64
|
||||||
DistanceFunc string
|
DistanceFunc string
|
||||||
}
|
}
|
||||||
|
|
||||||
//Mints a new classifier.
|
// Mints a new classifier.
|
||||||
func NewKnnRegressor(labels []float64, numbers []float64, x int, y int, distfunc string) *KNNRegressor {
|
func NewKnnRegressor(values []float64, numbers []float64, x int, y int, distfunc string) *KNNRegressor {
|
||||||
KNN := KNNRegressor{}
|
KNN := KNNRegressor{}
|
||||||
KNN.Data = mat.MakeDenseMatrix(numbers, x, y)
|
KNN.Data = mat64.NewDense(x, y, numbers)
|
||||||
KNN.Labels = labels
|
KNN.Values = values
|
||||||
|
KNN.DistanceFunc = distfunc
|
||||||
return &KNN
|
return &KNN
|
||||||
}
|
}
|
||||||
|
|
||||||
//Returns an average of the K nearest labels/variables, based on a vector input.
|
//Returns an average of the K nearest labels/variables, based on a vector input.
|
||||||
func (KNN *KNNRegressor) Predict(vector *mat.DenseMatrix, K int) (float64, []int) {
|
func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {
|
||||||
|
|
||||||
rows := KNN.Data.Rows()
|
// Get the number of rows
|
||||||
|
rows, _ := KNN.Data.Dims()
|
||||||
rownumbers := make(map[int]float64)
|
rownumbers := make(map[int]float64)
|
||||||
labels := make([]float64, 1)
|
labels := make([]float64, 0)
|
||||||
sum := 0.0
|
|
||||||
|
|
||||||
for i := 0; i < rows; i++ {
|
// Check what distance function we are using
|
||||||
row := KNN.Data.GetRowVector(i)
|
switch KNN.DistanceFunc {
|
||||||
eucdistance, _ := util.ComputeDistance(KNN.DistanceFunc, row, vector)
|
case "euclidean":
|
||||||
rownumbers[i] = eucdistance
|
{
|
||||||
|
euclidean := pairwiseMetrics.NewEuclidean()
|
||||||
|
for i := 0; i < rows; i++ {
|
||||||
|
row := KNN.Data.RowView(i)
|
||||||
|
rowMat := util.FloatsToMatrix(row)
|
||||||
|
distance := euclidean.Distance(rowMat, vector)
|
||||||
|
rownumbers[i] = distance
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case "manhattan":
|
||||||
|
{
|
||||||
|
manhattan := pairwiseMetrics.NewEuclidean()
|
||||||
|
for i := 0; i < rows; i++ {
|
||||||
|
row := KNN.Data.RowView(i)
|
||||||
|
rowMat := util.FloatsToMatrix(row)
|
||||||
|
distance := manhattan.Distance(rowMat, vector)
|
||||||
|
rownumbers[i] = distance
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sorted := util.SortIntMap(rownumbers)
|
sorted := util.SortIntMap(rownumbers)
|
||||||
values := sorted[:K]
|
values := sorted[:K]
|
||||||
|
|
||||||
|
var sum float64
|
||||||
for _, elem := range values {
|
for _, elem := range values {
|
||||||
value := KNN.Labels[elem]
|
value := KNN.Values[elem]
|
||||||
labels = append(labels, value)
|
labels = append(labels, value)
|
||||||
sum += value
|
sum += value
|
||||||
}
|
}
|
||||||
|
|
||||||
average := sum / float64(K)
|
average := sum / float64(K)
|
||||||
return average, values
|
return average
|
||||||
}
|
}
|
||||||
|
@ -1 +0,0 @@
|
|||||||
package knn
|
|
@ -1,7 +1,12 @@
|
|||||||
package lm
|
package lm
|
||||||
|
|
||||||
|
<<<<<<< HEAD
|
||||||
import base "golearn/base"
|
import base "golearn/base"
|
||||||
|
|
||||||
type LinearModel struct {
|
type LinearModel struct {
|
||||||
base.BaseEstimator
|
base.BaseEstimator
|
||||||
|
=======
|
||||||
|
type LinearModel struct {
|
||||||
|
// base.BaseRegressor
|
||||||
|
>>>>>>> 798751c839ea79f6dff13e790f80a4d5bcbfea68
|
||||||
}
|
}
|
||||||
|
31
metrics/pairwise/euclidean.go
Normal file
31
metrics/pairwise/euclidean.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
package pairwise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Euclidean struct{}
|
||||||
|
|
||||||
|
func NewEuclidean() *Euclidean {
|
||||||
|
return &Euclidean{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute usual inner product in the sense of euclidean.
|
||||||
|
func (self *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||||
|
result := vectorX.Dot(vectorY)
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute usual distance in the sense of euclidean.
|
||||||
|
// Also known as L2 distance.
|
||||||
|
func (self *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||||
|
subVector := mat64.NewDense(0, 0, nil)
|
||||||
|
subVector.Sub(vectorX, vectorY)
|
||||||
|
|
||||||
|
result := self.InnerProduct(subVector, subVector)
|
||||||
|
|
||||||
|
return math.Sqrt(result)
|
||||||
|
}
|
36
metrics/pairwise/euclidean_test.go
Normal file
36
metrics/pairwise/euclidean_test.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
package pairwise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
. "github.com/smartystreets/goconvey/convey"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestEuclidean(t *testing.T) {
|
||||||
|
var vectorX, vectorY *mat64.Dense
|
||||||
|
euclidean := NewEuclidean()
|
||||||
|
|
||||||
|
Convey("Given two vectors", t, func() {
|
||||||
|
vectorX = mat64.NewDense(3, 1, []float64{1, 2, 3})
|
||||||
|
vectorY = mat64.NewDense(3, 1, []float64{2, 4, 5})
|
||||||
|
|
||||||
|
Convey("When doing inner product", func() {
|
||||||
|
result := euclidean.InnerProduct(vectorX, vectorY)
|
||||||
|
|
||||||
|
Convey("The result should be 25", func() {
|
||||||
|
So(result, ShouldEqual, 25)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Convey("When calculating distance", func() {
|
||||||
|
result := euclidean.Distance(vectorX, vectorY)
|
||||||
|
|
||||||
|
Convey("The result should be 3", func() {
|
||||||
|
So(result, ShouldEqual, 3)
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
40
metrics/pairwise/manhattan.go
Normal file
40
metrics/pairwise/manhattan.go
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
package pairwise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Manhattan struct{}
|
||||||
|
|
||||||
|
func NewManhattan() *Manhattan {
|
||||||
|
return &Manhattan{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manhattan distance, also known as L1 distance.
|
||||||
|
// Compute sum of absolute values of elements.
|
||||||
|
func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||||
|
var length int
|
||||||
|
subVector := mat64.NewDense(0, 0, nil)
|
||||||
|
subVector.Sub(vectorX, vectorY)
|
||||||
|
|
||||||
|
r, c := subVector.Dims()
|
||||||
|
|
||||||
|
if r == 1 {
|
||||||
|
// Force transpose to column vector
|
||||||
|
subVector.TCopy(subVector)
|
||||||
|
length = c
|
||||||
|
} else if c == 1 {
|
||||||
|
length = r
|
||||||
|
} else {
|
||||||
|
panic(mat64.ErrShape)
|
||||||
|
}
|
||||||
|
|
||||||
|
result := .0
|
||||||
|
for i := 0; i < length; i++ {
|
||||||
|
result += math.Abs(subVector.At(i, 0))
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
42
metrics/pairwise/manhattan_test.go
Normal file
42
metrics/pairwise/manhattan_test.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
package pairwise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
. "github.com/smartystreets/goconvey/convey"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestManhattan(t *testing.T) {
|
||||||
|
var vectorX, vectorY *mat64.Dense
|
||||||
|
manhattan := NewManhattan()
|
||||||
|
|
||||||
|
Convey("Given two vectors", t, func() {
|
||||||
|
vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3})
|
||||||
|
vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5})
|
||||||
|
|
||||||
|
Convey("When calculating distance with column vectors", func() {
|
||||||
|
result := manhattan.Distance(vectorX, vectorY)
|
||||||
|
|
||||||
|
Convey("The result should be 5", func() {
|
||||||
|
So(result, ShouldEqual, 5)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Convey("When calculating distance with row vectors", func() {
|
||||||
|
vectorX.TCopy(vectorX)
|
||||||
|
vectorY.TCopy(vectorY)
|
||||||
|
result := manhattan.Distance(vectorX, vectorY)
|
||||||
|
|
||||||
|
Convey("The result should be 5", func() {
|
||||||
|
So(result, ShouldEqual, 5)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Convey("When calculating distance with row and column vectors", func() {
|
||||||
|
vectorX.TCopy(vectorX)
|
||||||
|
So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape)
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
2
metrics/pairwise/pairwise.go
Normal file
2
metrics/pairwise/pairwise.go
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
// Package pairwise implements utilities to evaluate pairwise distances or inner product (via kernel).
|
||||||
|
package pairwise
|
34
metrics/pairwise/poly_kernel.go
Normal file
34
metrics/pairwise/poly_kernel.go
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
package pairwise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PolyKernel struct {
|
||||||
|
degree int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a d-degree polynomial kernel
|
||||||
|
func NewPolyKernel(degree int) *PolyKernel {
|
||||||
|
return &PolyKernel{degree: degree}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute inner product through kernel trick
|
||||||
|
// K(x, y) = (x^T y + 1)^d
|
||||||
|
func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||||
|
result := vectorX.Dot(vectorY)
|
||||||
|
result = math.Pow(result+1, float64(self.degree))
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute distance under the polynomial kernel, maybe no need.
|
||||||
|
func (self *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||||
|
subVector := mat64.NewDense(0, 0, nil)
|
||||||
|
subVector.Sub(vectorX, vectorY)
|
||||||
|
result := self.InnerProduct(subVector, subVector)
|
||||||
|
|
||||||
|
return math.Sqrt(result)
|
||||||
|
}
|
36
metrics/pairwise/poly_kernel_test.go
Normal file
36
metrics/pairwise/poly_kernel_test.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
package pairwise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
. "github.com/smartystreets/goconvey/convey"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPolyKernel(t *testing.T) {
|
||||||
|
var vectorX, vectorY *mat64.Dense
|
||||||
|
polyKernel := NewPolyKernel(3)
|
||||||
|
|
||||||
|
Convey("Given two vectors", t, func() {
|
||||||
|
vectorX = mat64.NewDense(3, 1, []float64{1, 2, 3})
|
||||||
|
vectorY = mat64.NewDense(3, 1, []float64{2, 4, 5})
|
||||||
|
|
||||||
|
Convey("When doing inner product", func() {
|
||||||
|
result := polyKernel.InnerProduct(vectorX, vectorY)
|
||||||
|
|
||||||
|
Convey("The result should be 17576", func() {
|
||||||
|
So(result, ShouldEqual, 17576)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Convey("When calculating distance", func() {
|
||||||
|
result := polyKernel.Distance(vectorX, vectorY)
|
||||||
|
|
||||||
|
Convey("The result should be 31.622776601683793", func() {
|
||||||
|
So(result, ShouldEqual, 31.622776601683793)
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
27
metrics/pairwise/rbf_kernel.go
Normal file
27
metrics/pairwise/rbf_kernel.go
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
package pairwise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RBFKernel struct {
|
||||||
|
gamma float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Radial Basis Function Kernel
|
||||||
|
func NewRBFKernel(gamma float64) *RBFKernel {
|
||||||
|
return &RBFKernel{gamma: gamma}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute inner product through kernel trick
|
||||||
|
// K(x, y) = exp(-gamma * ||x - y||^2)
|
||||||
|
func (self *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||||
|
euclidean := NewEuclidean()
|
||||||
|
distance := euclidean.Distance(vectorX, vectorY)
|
||||||
|
|
||||||
|
result := math.Exp(-self.gamma * math.Pow(distance, 2))
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
28
metrics/pairwise/rbf_kernel_test.go
Normal file
28
metrics/pairwise/rbf_kernel_test.go
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
package pairwise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gonum/matrix/mat64"
|
||||||
|
. "github.com/smartystreets/goconvey/convey"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRBFKernel(t *testing.T) {
|
||||||
|
var vectorX, vectorY *mat64.Dense
|
||||||
|
rbfKernel := NewRBFKernel(0.1)
|
||||||
|
|
||||||
|
Convey("Given two vectors", t, func() {
|
||||||
|
vectorX = mat64.NewDense(3, 1, []float64{1, 2, 3})
|
||||||
|
vectorY = mat64.NewDense(3, 1, []float64{2, 4, 5})
|
||||||
|
|
||||||
|
Convey("When doing inner product", func() {
|
||||||
|
result := rbfKernel.InnerProduct(vectorX, vectorY)
|
||||||
|
|
||||||
|
Convey("The result should be 0.4065696597405991", func() {
|
||||||
|
So(result, ShouldEqual, 0.4065696597405991)
|
||||||
|
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
@ -1,36 +0,0 @@
|
|||||||
package utilities
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
|
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Computes the 'distance' between two vectors, where the distance is one of the following methods -
|
|
||||||
// euclidean (more to come)
|
|
||||||
func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) (float64, error) {
|
|
||||||
var sum float64
|
|
||||||
|
|
||||||
switch metric {
|
|
||||||
case "euclidean":
|
|
||||||
{
|
|
||||||
difference, err := testrow.MinusDense(vector)
|
|
||||||
flat := difference.Array()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, i := range flat {
|
|
||||||
squared := math.Pow(i, 2)
|
|
||||||
sum += squared
|
|
||||||
}
|
|
||||||
|
|
||||||
eucdistance := math.Sqrt(sum)
|
|
||||||
return eucdistance, nil
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return 0.0, fmt.Errorf("ValueError: %s is not an implemented distance method", metric)
|
|
||||||
}
|
|
||||||
}
|
|
@ -5,6 +5,8 @@ import (
|
|||||||
rand "math/rand"
|
rand "math/rand"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
|
mat64 "github.com/gonum/matrix/mat64"
|
||||||
)
|
)
|
||||||
|
|
||||||
type sortedIntMap struct {
|
type sortedIntMap struct {
|
||||||
@ -88,3 +90,7 @@ func ConvertLabelsToFloat(labels []string) []float64 {
|
|||||||
}
|
}
|
||||||
return floats
|
return floats
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func FloatsToMatrix(floats []float64) *mat64.Dense {
|
||||||
|
return mat64.NewDense(1, len(floats), floats)
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user