1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-28 13:48:56 +08:00

Refactoring some stuff

This commit is contained in:
Stephen Whitworth 2014-05-01 19:56:30 +01:00
commit 8f1de1ba2c
9 changed files with 116 additions and 83 deletions

21
LICENSE.md Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) {{{year}}} {{{fullname}}}
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,10 +1,28 @@
GoLearn
=======
A basic, but useful, machine learning library for Go.
<img src="http://talks.golang.org/2013/advconc/gopherhat.jpg" width=125>
To do:
A small start on a machine learning library in Go.
* Add more classifiers/regressors - random forests, logistic and linear regression
* Add useful tools - K fold parameter search
* Improve error handling
Install
=======
```
go get github.com/sjwhitworth/golearn
cd src/github.com/sjwhitworth/golearn
go get ./...
```
Examples
=======
```
cd examples/
go run knnclassifier_iris.go
```
Join the team
=============
If you'd like to contribute, please send me a mail at stephen dot whitworth at hailocab dot com.

View File

@ -3,6 +3,10 @@
package base
import (
mat "github.com/skelterjohn/go.matrix"
)
// An object that can ingest some data and train on it.
type Estimator interface {
Fit()
@ -19,10 +23,7 @@ type Model interface {
Score()
}
// @todo;
type BaseClassifier struct {
}
// @todo;
type BaseRegressor struct {
// @todo: Implement BaseEstimator setters and getters.
type BaseEstimator struct {
Data *mat.DenseMatrix
}

View File

@ -2,19 +2,20 @@ package main
import (
"fmt"
data "github.com/sjwhitworth/golearn/data"
knnclass "github.com/sjwhitworth/golearn/knn"
util "github.com/sjwhitworth/golearn/utilities"
mat "github.com/skelterjohn/go.matrix"
"golearn/data"
util "golearn/utilities"
)
func main() {
//Parses the infamous Iris data.
cols, rows, _, labels, data := base.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2})
cols, rows, _, labels, data := data.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2})
//Initialises a new KNN classifier
knn := knnclass.KNNClassifier{}
knn.C
knn.New("Testing", labels, data, rows, cols)
knn.New(labels, data, rows, cols, "euclidean")
for {
//Creates a random array of N float64s between 0 and 7

View File

@ -1,31 +1,32 @@
package main
import (
mat "github.com/skelterjohn/go.matrix"
base "golearn/base"
util "golearn/utilities"
knnclass "golearn/knn"
"fmt"
)
"fmt"
func main(){
data "github.com/sjwhitworth/golearn/data"
knnclass "github.com/sjwhitworth/golearn/knn"
util "github.com/sjwhitworth/golearn/utilities"
mat "github.com/skelterjohn/go.matrix"
)
func main() {
//Parses the infamous Iris data.
cols, rows, _, labels, data := base.ParseCsv("datasets/randomdata.csv", 2, []int{0,1})
cols, rows, _, labels, data := data.ParseCsv("datasets/randomdata.csv", 2, []int{0, 1})
newlabels := util.ConvertLabelsToFloat(labels)
//Initialises a new KNN classifier
knn := knnclass.KNNRegressor{}
knn.New("Testing", newlabels, data, rows, cols)
knn.New(newlabels, data, rows, cols, "euclidean")
for {
//Creates a random array of N float64s between 0 and Y
randArray := util.RandomArray(2, 100)
//Initialises a vector with this array
random := mat.MakeDenseMatrix(randArray,1,2)
random := mat.MakeDenseMatrix(randArray, 1, 2)
//Calculates the Euclidean distance and returns the most popular label
outcome, _ := knn.Predict(random, 3)
fmt.Println(outcome)
}
}
}

1
golearn.go Normal file
View File

@ -0,0 +1 @@
package golearn

View File

@ -1,47 +1,28 @@
package knn
import (
"fmt"
base "github.com/sjwhitworth/golearn/base"
util "github.com/sjwhitworth/golearn/utilities"
mat "github.com/skelterjohn/go.matrix"
base "golearn/base"
util "golearn/utilities"
"math"
)
//A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a name.
type KNNClassifier struct {
base.BaseClassifier
base.BaseEstimator
Labels []string
DistanceFunc string
}
//Mints a new classifier.
func (KNN *KNNClassifier) New(name string, labels []string, numbers []float64, x int, y int) {
func (KNN *KNNClassifier) New(labels []string, numbers []float64, x int, y int, distfunc string) {
KNN.Data = *mat.MakeDenseMatrix(numbers, x, y)
KNN.Name = name
KNN.Data = mat.MakeDenseMatrix(numbers, x, y)
KNN.Labels = labels
KNN.DistanceFunc = distfunc
}
//Computes the Euclidean distance between two vectors.
func (KNN *KNNClassifier) ComputeDistance(vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 {
var sum float64
difference, err := testrow.MinusDense(vector)
flat := difference.Array()
if err != nil {
fmt.Println(err)
}
for _, i := range flat {
squared := math.Pow(i, 2)
sum += squared
}
eucdistance := math.Sqrt(sum)
return eucdistance
}
//Returns a classification for the vector, based on a vector input, using the KNN algorithm.
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
// @todo: Lots of room to improve this. V messy.
func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int) {
rows := KNN.Data.Rows()
@ -51,7 +32,9 @@ func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int
for i := 0; i < rows; i++ {
row := KNN.Data.GetRowVector(i)
eucdistance := KNN.ComputeDistance(row, vector)
//Will put code in to check errs later
eucdistance, _ := util.ComputeDistance(KNN.DistanceFunc, row, vector)
rownumbers[i] = eucdistance
}

View File

@ -2,21 +2,23 @@ package knn
import (
"fmt"
mat "github.com/skelterjohn/go.matrix"
base "golearn/base"
util "golearn/utilities"
"math"
util "github.com/sjwhitworth/golearn/utilities"
mat "github.com/skelterjohn/go.matrix"
)
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
type KNNRegressor struct {
base.BaseRegressor
Data *mat.DenseMatrix
Name string
Labels []float64
}
//Mints a new classifier.
func (KNN *KNNRegressor) New(name string, labels []float64, numbers []float64, x int, y int) {
KNN.Data = *mat.MakeDenseMatrix(numbers, x, y)
KNN.Data = mat.MakeDenseMatrix(numbers, x, y)
KNN.Name = name
KNN.Labels = labels
}

View File

@ -1,31 +1,36 @@
package utilities
import (
util "../utilities"
"fmt"
mat "github.com/skelterjohn/go.matrix"
"math"
mat "github.com/skelterjohn/go.matrix"
)
//Computes the Euclidean distance between two vectors.
func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 {
// Computes the 'distance' between two vectors, where the distance is one of the following methods -
// euclidean (more to come)
func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) (float64, error) {
var sum float64
// Compute a variety of distance metrics
switch metric:
case "euclidean": {
difference, err := testrow.MinusDense(vector)
flat := difference.Array()
switch metric {
case "euclidean":
{
difference, err := testrow.MinusDense(vector)
flat := difference.Array()
if err != nil {
fmt.Println(err)
if err != nil {
fmt.Println(err)
}
for _, i := range flat {
squared := math.Pow(i, 2)
sum += squared
}
eucdistance := math.Sqrt(sum)
return eucdistance, nil
}
for _, i := range flat {
squared := math.Pow(i, 2)
sum += squared
}
eucdistance := math.Sqrt(sum)
return eucdistance
}
default:
return 0.0, fmt.Errorf("ValueError: %s is not an implemented distance method", metric)
}
}