diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..766a0a5 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) {{{year}}} {{{fullname}}} + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index d828999..1e687c7 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,28 @@ GoLearn ======= -A basic, but useful, machine learning library for Go. + -To do: +A small start on a machine learning library in Go. -* Add more classifiers/regressors - random forests, logistic and linear regression -* Add useful tools - K fold parameter search -* Improve error handling +Install +======= + +``` +go get github.com/sjwhitworth/golearn +cd src/github.com/sjwhitworth/golearn +go get ./... +``` + +Examples +======= + +``` +cd examples/ +go run knnclassifier_iris.go +``` + +Join the team +============= + +If you'd like to contribute, please send me a mail at stephen dot whitworth at hailocab dot com. diff --git a/base/domain.go b/base/domain.go index a87c1b1..5e9b75a 100644 --- a/base/domain.go +++ b/base/domain.go @@ -3,6 +3,10 @@ package base +import ( + mat "github.com/skelterjohn/go.matrix" +) + // An object that can ingest some data and train on it. type Estimator interface { Fit() @@ -19,10 +23,7 @@ type Model interface { Score() } -// @todo; -type BaseClassifier struct { -} - -// @todo; -type BaseRegressor struct { +// @todo: Implement BaseEstimator setters and getters. +type BaseEstimator struct { + Data *mat.DenseMatrix } diff --git a/examples/knnclassifier_iris.go b/examples/knnclassifier_iris.go index c859233..e30dab1 100644 --- a/examples/knnclassifier_iris.go +++ b/examples/knnclassifier_iris.go @@ -2,19 +2,20 @@ package main import ( "fmt" + + data "github.com/sjwhitworth/golearn/data" + knnclass "github.com/sjwhitworth/golearn/knn" + util "github.com/sjwhitworth/golearn/utilities" mat "github.com/skelterjohn/go.matrix" - "golearn/data" - util "golearn/utilities" ) func main() { //Parses the infamous Iris data. - cols, rows, _, labels, data := base.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2}) + cols, rows, _, labels, data := data.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2}) //Initialises a new KNN classifier knn := knnclass.KNNClassifier{} - knn.C - knn.New("Testing", labels, data, rows, cols) + knn.New(labels, data, rows, cols, "euclidean") for { //Creates a random array of N float64s between 0 and 7 diff --git a/examples/knnregressor_random.go b/examples/knnregressor_random.go index dccf2cc..80c24c0 100644 --- a/examples/knnregressor_random.go +++ b/examples/knnregressor_random.go @@ -1,31 +1,32 @@ package main import ( - mat "github.com/skelterjohn/go.matrix" - base "golearn/base" - util "golearn/utilities" - knnclass "golearn/knn" - "fmt" - ) + "fmt" -func main(){ + data "github.com/sjwhitworth/golearn/data" + knnclass "github.com/sjwhitworth/golearn/knn" + util "github.com/sjwhitworth/golearn/utilities" + mat "github.com/skelterjohn/go.matrix" +) + +func main() { //Parses the infamous Iris data. - cols, rows, _, labels, data := base.ParseCsv("datasets/randomdata.csv", 2, []int{0,1}) + cols, rows, _, labels, data := data.ParseCsv("datasets/randomdata.csv", 2, []int{0, 1}) newlabels := util.ConvertLabelsToFloat(labels) //Initialises a new KNN classifier knn := knnclass.KNNRegressor{} - knn.New("Testing", newlabels, data, rows, cols) - + knn.New(newlabels, data, rows, cols, "euclidean") + for { //Creates a random array of N float64s between 0 and Y randArray := util.RandomArray(2, 100) //Initialises a vector with this array - random := mat.MakeDenseMatrix(randArray,1,2) + random := mat.MakeDenseMatrix(randArray, 1, 2) //Calculates the Euclidean distance and returns the most popular label outcome, _ := knn.Predict(random, 3) fmt.Println(outcome) } -} \ No newline at end of file +} diff --git a/golearn.go b/golearn.go new file mode 100644 index 0000000..7072cd8 --- /dev/null +++ b/golearn.go @@ -0,0 +1 @@ +package golearn diff --git a/knn/knn.go b/knn/knn.go index 350c04f..6e4009f 100644 --- a/knn/knn.go +++ b/knn/knn.go @@ -1,47 +1,28 @@ package knn import ( - "fmt" + base "github.com/sjwhitworth/golearn/base" + util "github.com/sjwhitworth/golearn/utilities" mat "github.com/skelterjohn/go.matrix" - base "golearn/base" - util "golearn/utilities" - "math" ) //A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a name. type KNNClassifier struct { - base.BaseClassifier + base.BaseEstimator + Labels []string + DistanceFunc string } //Mints a new classifier. -func (KNN *KNNClassifier) New(name string, labels []string, numbers []float64, x int, y int) { +func (KNN *KNNClassifier) New(labels []string, numbers []float64, x int, y int, distfunc string) { - KNN.Data = *mat.MakeDenseMatrix(numbers, x, y) - KNN.Name = name + KNN.Data = mat.MakeDenseMatrix(numbers, x, y) KNN.Labels = labels + KNN.DistanceFunc = distfunc } -//Computes the Euclidean distance between two vectors. -func (KNN *KNNClassifier) ComputeDistance(vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 { - var sum float64 - - difference, err := testrow.MinusDense(vector) - flat := difference.Array() - - if err != nil { - fmt.Println(err) - } - - for _, i := range flat { - squared := math.Pow(i, 2) - sum += squared - } - - eucdistance := math.Sqrt(sum) - return eucdistance -} - -//Returns a classification for the vector, based on a vector input, using the KNN algorithm. +// Returns a classification for the vector, based on a vector input, using the KNN algorithm. +// @todo: Lots of room to improve this. V messy. func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int) { rows := KNN.Data.Rows() @@ -51,7 +32,9 @@ func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int for i := 0; i < rows; i++ { row := KNN.Data.GetRowVector(i) - eucdistance := KNN.ComputeDistance(row, vector) + + //Will put code in to check errs later + eucdistance, _ := util.ComputeDistance(KNN.DistanceFunc, row, vector) rownumbers[i] = eucdistance } diff --git a/knn/knnregressor.go b/knn/knnregressor.go index c4cff39..ab82599 100644 --- a/knn/knnregressor.go +++ b/knn/knnregressor.go @@ -2,21 +2,23 @@ package knn import ( "fmt" - mat "github.com/skelterjohn/go.matrix" - base "golearn/base" - util "golearn/utilities" "math" + + util "github.com/sjwhitworth/golearn/utilities" + mat "github.com/skelterjohn/go.matrix" ) //A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name. type KNNRegressor struct { - base.BaseRegressor + Data *mat.DenseMatrix + Name string + Labels []float64 } //Mints a new classifier. func (KNN *KNNRegressor) New(name string, labels []float64, numbers []float64, x int, y int) { - KNN.Data = *mat.MakeDenseMatrix(numbers, x, y) + KNN.Data = mat.MakeDenseMatrix(numbers, x, y) KNN.Name = name KNN.Labels = labels } diff --git a/utilities/distance.go b/utilities/distance.go index 1e58df9..d044fd2 100644 --- a/utilities/distance.go +++ b/utilities/distance.go @@ -1,31 +1,36 @@ package utilities import ( - util "../utilities" "fmt" - mat "github.com/skelterjohn/go.matrix" "math" + + mat "github.com/skelterjohn/go.matrix" ) -//Computes the Euclidean distance between two vectors. -func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 { +// Computes the 'distance' between two vectors, where the distance is one of the following methods - +// euclidean (more to come) +func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) (float64, error) { var sum float64 - // Compute a variety of distance metrics - switch metric: - case "euclidean": { - difference, err := testrow.MinusDense(vector) - flat := difference.Array() + switch metric { + case "euclidean": + { + difference, err := testrow.MinusDense(vector) + flat := difference.Array() - if err != nil { - fmt.Println(err) + if err != nil { + fmt.Println(err) + } + + for _, i := range flat { + squared := math.Pow(i, 2) + sum += squared + } + + eucdistance := math.Sqrt(sum) + return eucdistance, nil } - - for _, i := range flat { - squared := math.Pow(i, 2) - sum += squared - } - - eucdistance := math.Sqrt(sum) - return eucdistance - } \ No newline at end of file + default: + return 0.0, fmt.Errorf("ValueError: %s is not an implemented distance method", metric) + } +}