1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-28 13:48:56 +08:00

Refactoring some stuff

This commit is contained in:
Stephen Whitworth 2014-05-01 19:56:30 +01:00
commit 8f1de1ba2c
9 changed files with 116 additions and 83 deletions

21
LICENSE.md Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) {{{year}}} {{{fullname}}}
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,10 +1,28 @@
GoLearn GoLearn
======= =======
A basic, but useful, machine learning library for Go. <img src="http://talks.golang.org/2013/advconc/gopherhat.jpg" width=125>
To do: A small start on a machine learning library in Go.
* Add more classifiers/regressors - random forests, logistic and linear regression Install
* Add useful tools - K fold parameter search =======
* Improve error handling
```
go get github.com/sjwhitworth/golearn
cd src/github.com/sjwhitworth/golearn
go get ./...
```
Examples
=======
```
cd examples/
go run knnclassifier_iris.go
```
Join the team
=============
If you'd like to contribute, please send me a mail at stephen dot whitworth at hailocab dot com.

View File

@ -3,6 +3,10 @@
package base package base
import (
mat "github.com/skelterjohn/go.matrix"
)
// An object that can ingest some data and train on it. // An object that can ingest some data and train on it.
type Estimator interface { type Estimator interface {
Fit() Fit()
@ -19,10 +23,7 @@ type Model interface {
Score() Score()
} }
// @todo; // @todo: Implement BaseEstimator setters and getters.
type BaseClassifier struct { type BaseEstimator struct {
} Data *mat.DenseMatrix
// @todo;
type BaseRegressor struct {
} }

View File

@ -2,19 +2,20 @@ package main
import ( import (
"fmt" "fmt"
data "github.com/sjwhitworth/golearn/data"
knnclass "github.com/sjwhitworth/golearn/knn"
util "github.com/sjwhitworth/golearn/utilities"
mat "github.com/skelterjohn/go.matrix" mat "github.com/skelterjohn/go.matrix"
"golearn/data"
util "golearn/utilities"
) )
func main() { func main() {
//Parses the infamous Iris data. //Parses the infamous Iris data.
cols, rows, _, labels, data := base.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2}) cols, rows, _, labels, data := data.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2})
//Initialises a new KNN classifier //Initialises a new KNN classifier
knn := knnclass.KNNClassifier{} knn := knnclass.KNNClassifier{}
knn.C knn.New(labels, data, rows, cols, "euclidean")
knn.New("Testing", labels, data, rows, cols)
for { for {
//Creates a random array of N float64s between 0 and 7 //Creates a random array of N float64s between 0 and 7

View File

@ -1,31 +1,32 @@
package main package main
import ( import (
mat "github.com/skelterjohn/go.matrix" "fmt"
base "golearn/base"
util "golearn/utilities"
knnclass "golearn/knn"
"fmt"
)
func main(){ data "github.com/sjwhitworth/golearn/data"
knnclass "github.com/sjwhitworth/golearn/knn"
util "github.com/sjwhitworth/golearn/utilities"
mat "github.com/skelterjohn/go.matrix"
)
func main() {
//Parses the infamous Iris data. //Parses the infamous Iris data.
cols, rows, _, labels, data := base.ParseCsv("datasets/randomdata.csv", 2, []int{0,1}) cols, rows, _, labels, data := data.ParseCsv("datasets/randomdata.csv", 2, []int{0, 1})
newlabels := util.ConvertLabelsToFloat(labels) newlabels := util.ConvertLabelsToFloat(labels)
//Initialises a new KNN classifier //Initialises a new KNN classifier
knn := knnclass.KNNRegressor{} knn := knnclass.KNNRegressor{}
knn.New("Testing", newlabels, data, rows, cols) knn.New(newlabels, data, rows, cols, "euclidean")
for { for {
//Creates a random array of N float64s between 0 and Y //Creates a random array of N float64s between 0 and Y
randArray := util.RandomArray(2, 100) randArray := util.RandomArray(2, 100)
//Initialises a vector with this array //Initialises a vector with this array
random := mat.MakeDenseMatrix(randArray,1,2) random := mat.MakeDenseMatrix(randArray, 1, 2)
//Calculates the Euclidean distance and returns the most popular label //Calculates the Euclidean distance and returns the most popular label
outcome, _ := knn.Predict(random, 3) outcome, _ := knn.Predict(random, 3)
fmt.Println(outcome) fmt.Println(outcome)
} }
} }

1
golearn.go Normal file
View File

@ -0,0 +1 @@
package golearn

View File

@ -1,47 +1,28 @@
package knn package knn
import ( import (
"fmt" base "github.com/sjwhitworth/golearn/base"
util "github.com/sjwhitworth/golearn/utilities"
mat "github.com/skelterjohn/go.matrix" mat "github.com/skelterjohn/go.matrix"
base "golearn/base"
util "golearn/utilities"
"math"
) )
//A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a name. //A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a name.
type KNNClassifier struct { type KNNClassifier struct {
base.BaseClassifier base.BaseEstimator
Labels []string
DistanceFunc string
} }
//Mints a new classifier. //Mints a new classifier.
func (KNN *KNNClassifier) New(name string, labels []string, numbers []float64, x int, y int) { func (KNN *KNNClassifier) New(labels []string, numbers []float64, x int, y int, distfunc string) {
KNN.Data = *mat.MakeDenseMatrix(numbers, x, y) KNN.Data = mat.MakeDenseMatrix(numbers, x, y)
KNN.Name = name
KNN.Labels = labels KNN.Labels = labels
KNN.DistanceFunc = distfunc
} }
//Computes the Euclidean distance between two vectors. // Returns a classification for the vector, based on a vector input, using the KNN algorithm.
func (KNN *KNNClassifier) ComputeDistance(vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 { // @todo: Lots of room to improve this. V messy.
var sum float64
difference, err := testrow.MinusDense(vector)
flat := difference.Array()
if err != nil {
fmt.Println(err)
}
for _, i := range flat {
squared := math.Pow(i, 2)
sum += squared
}
eucdistance := math.Sqrt(sum)
return eucdistance
}
//Returns a classification for the vector, based on a vector input, using the KNN algorithm.
func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int) { func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int) {
rows := KNN.Data.Rows() rows := KNN.Data.Rows()
@ -51,7 +32,9 @@ func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int
for i := 0; i < rows; i++ { for i := 0; i < rows; i++ {
row := KNN.Data.GetRowVector(i) row := KNN.Data.GetRowVector(i)
eucdistance := KNN.ComputeDistance(row, vector)
//Will put code in to check errs later
eucdistance, _ := util.ComputeDistance(KNN.DistanceFunc, row, vector)
rownumbers[i] = eucdistance rownumbers[i] = eucdistance
} }

View File

@ -2,21 +2,23 @@ package knn
import ( import (
"fmt" "fmt"
mat "github.com/skelterjohn/go.matrix"
base "golearn/base"
util "golearn/utilities"
"math" "math"
util "github.com/sjwhitworth/golearn/utilities"
mat "github.com/skelterjohn/go.matrix"
) )
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name. //A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
type KNNRegressor struct { type KNNRegressor struct {
base.BaseRegressor Data *mat.DenseMatrix
Name string
Labels []float64
} }
//Mints a new classifier. //Mints a new classifier.
func (KNN *KNNRegressor) New(name string, labels []float64, numbers []float64, x int, y int) { func (KNN *KNNRegressor) New(name string, labels []float64, numbers []float64, x int, y int) {
KNN.Data = *mat.MakeDenseMatrix(numbers, x, y) KNN.Data = mat.MakeDenseMatrix(numbers, x, y)
KNN.Name = name KNN.Name = name
KNN.Labels = labels KNN.Labels = labels
} }

View File

@ -1,31 +1,36 @@
package utilities package utilities
import ( import (
util "../utilities"
"fmt" "fmt"
mat "github.com/skelterjohn/go.matrix"
"math" "math"
mat "github.com/skelterjohn/go.matrix"
) )
//Computes the Euclidean distance between two vectors. // Computes the 'distance' between two vectors, where the distance is one of the following methods -
func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 { // euclidean (more to come)
func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) (float64, error) {
var sum float64 var sum float64
// Compute a variety of distance metrics switch metric {
switch metric: case "euclidean":
case "euclidean": { {
difference, err := testrow.MinusDense(vector) difference, err := testrow.MinusDense(vector)
flat := difference.Array() flat := difference.Array()
if err != nil { if err != nil {
fmt.Println(err) fmt.Println(err)
}
for _, i := range flat {
squared := math.Pow(i, 2)
sum += squared
}
eucdistance := math.Sqrt(sum)
return eucdistance, nil
} }
default:
for _, i := range flat { return 0.0, fmt.Errorf("ValueError: %s is not an implemented distance method", metric)
squared := math.Pow(i, 2) }
sum += squared }
}
eucdistance := math.Sqrt(sum)
return eucdistance
}