mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-28 13:48:56 +08:00
Refactoring some stuff
This commit is contained in:
commit
8f1de1ba2c
21
LICENSE.md
Normal file
21
LICENSE.md
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) {{{year}}} {{{fullname}}}
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
28
README.md
28
README.md
@ -1,10 +1,28 @@
|
|||||||
GoLearn
|
GoLearn
|
||||||
=======
|
=======
|
||||||
|
|
||||||
A basic, but useful, machine learning library for Go.
|
<img src="http://talks.golang.org/2013/advconc/gopherhat.jpg" width=125>
|
||||||
|
|
||||||
To do:
|
A small start on a machine learning library in Go.
|
||||||
|
|
||||||
* Add more classifiers/regressors - random forests, logistic and linear regression
|
Install
|
||||||
* Add useful tools - K fold parameter search
|
=======
|
||||||
* Improve error handling
|
|
||||||
|
```
|
||||||
|
go get github.com/sjwhitworth/golearn
|
||||||
|
cd src/github.com/sjwhitworth/golearn
|
||||||
|
go get ./...
|
||||||
|
```
|
||||||
|
|
||||||
|
Examples
|
||||||
|
=======
|
||||||
|
|
||||||
|
```
|
||||||
|
cd examples/
|
||||||
|
go run knnclassifier_iris.go
|
||||||
|
```
|
||||||
|
|
||||||
|
Join the team
|
||||||
|
=============
|
||||||
|
|
||||||
|
If you'd like to contribute, please send me a mail at stephen dot whitworth at hailocab dot com.
|
||||||
|
@ -3,6 +3,10 @@
|
|||||||
|
|
||||||
package base
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
mat "github.com/skelterjohn/go.matrix"
|
||||||
|
)
|
||||||
|
|
||||||
// An object that can ingest some data and train on it.
|
// An object that can ingest some data and train on it.
|
||||||
type Estimator interface {
|
type Estimator interface {
|
||||||
Fit()
|
Fit()
|
||||||
@ -19,10 +23,7 @@ type Model interface {
|
|||||||
Score()
|
Score()
|
||||||
}
|
}
|
||||||
|
|
||||||
// @todo;
|
// @todo: Implement BaseEstimator setters and getters.
|
||||||
type BaseClassifier struct {
|
type BaseEstimator struct {
|
||||||
}
|
Data *mat.DenseMatrix
|
||||||
|
|
||||||
// @todo;
|
|
||||||
type BaseRegressor struct {
|
|
||||||
}
|
}
|
||||||
|
@ -2,19 +2,20 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
data "github.com/sjwhitworth/golearn/data"
|
||||||
|
knnclass "github.com/sjwhitworth/golearn/knn"
|
||||||
|
util "github.com/sjwhitworth/golearn/utilities"
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
mat "github.com/skelterjohn/go.matrix"
|
||||||
"golearn/data"
|
|
||||||
util "golearn/utilities"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
//Parses the infamous Iris data.
|
//Parses the infamous Iris data.
|
||||||
cols, rows, _, labels, data := base.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2})
|
cols, rows, _, labels, data := data.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2})
|
||||||
|
|
||||||
//Initialises a new KNN classifier
|
//Initialises a new KNN classifier
|
||||||
knn := knnclass.KNNClassifier{}
|
knn := knnclass.KNNClassifier{}
|
||||||
knn.C
|
knn.New(labels, data, rows, cols, "euclidean")
|
||||||
knn.New("Testing", labels, data, rows, cols)
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
//Creates a random array of N float64s between 0 and 7
|
//Creates a random array of N float64s between 0 and 7
|
||||||
|
@ -1,31 +1,32 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
"fmt"
|
||||||
base "golearn/base"
|
|
||||||
util "golearn/utilities"
|
|
||||||
knnclass "golearn/knn"
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main(){
|
data "github.com/sjwhitworth/golearn/data"
|
||||||
|
knnclass "github.com/sjwhitworth/golearn/knn"
|
||||||
|
util "github.com/sjwhitworth/golearn/utilities"
|
||||||
|
mat "github.com/skelterjohn/go.matrix"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
//Parses the infamous Iris data.
|
//Parses the infamous Iris data.
|
||||||
cols, rows, _, labels, data := base.ParseCsv("datasets/randomdata.csv", 2, []int{0,1})
|
cols, rows, _, labels, data := data.ParseCsv("datasets/randomdata.csv", 2, []int{0, 1})
|
||||||
newlabels := util.ConvertLabelsToFloat(labels)
|
newlabels := util.ConvertLabelsToFloat(labels)
|
||||||
|
|
||||||
//Initialises a new KNN classifier
|
//Initialises a new KNN classifier
|
||||||
knn := knnclass.KNNRegressor{}
|
knn := knnclass.KNNRegressor{}
|
||||||
knn.New("Testing", newlabels, data, rows, cols)
|
knn.New(newlabels, data, rows, cols, "euclidean")
|
||||||
|
|
||||||
for {
|
for {
|
||||||
//Creates a random array of N float64s between 0 and Y
|
//Creates a random array of N float64s between 0 and Y
|
||||||
randArray := util.RandomArray(2, 100)
|
randArray := util.RandomArray(2, 100)
|
||||||
|
|
||||||
//Initialises a vector with this array
|
//Initialises a vector with this array
|
||||||
random := mat.MakeDenseMatrix(randArray,1,2)
|
random := mat.MakeDenseMatrix(randArray, 1, 2)
|
||||||
|
|
||||||
//Calculates the Euclidean distance and returns the most popular label
|
//Calculates the Euclidean distance and returns the most popular label
|
||||||
outcome, _ := knn.Predict(random, 3)
|
outcome, _ := knn.Predict(random, 3)
|
||||||
fmt.Println(outcome)
|
fmt.Println(outcome)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
1
golearn.go
Normal file
1
golearn.go
Normal file
@ -0,0 +1 @@
|
|||||||
|
package golearn
|
43
knn/knn.go
43
knn/knn.go
@ -1,47 +1,28 @@
|
|||||||
package knn
|
package knn
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
base "github.com/sjwhitworth/golearn/base"
|
||||||
|
util "github.com/sjwhitworth/golearn/utilities"
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
mat "github.com/skelterjohn/go.matrix"
|
||||||
base "golearn/base"
|
|
||||||
util "golearn/utilities"
|
|
||||||
"math"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
//A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a name.
|
//A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a name.
|
||||||
type KNNClassifier struct {
|
type KNNClassifier struct {
|
||||||
base.BaseClassifier
|
base.BaseEstimator
|
||||||
|
Labels []string
|
||||||
|
DistanceFunc string
|
||||||
}
|
}
|
||||||
|
|
||||||
//Mints a new classifier.
|
//Mints a new classifier.
|
||||||
func (KNN *KNNClassifier) New(name string, labels []string, numbers []float64, x int, y int) {
|
func (KNN *KNNClassifier) New(labels []string, numbers []float64, x int, y int, distfunc string) {
|
||||||
|
|
||||||
KNN.Data = *mat.MakeDenseMatrix(numbers, x, y)
|
KNN.Data = mat.MakeDenseMatrix(numbers, x, y)
|
||||||
KNN.Name = name
|
|
||||||
KNN.Labels = labels
|
KNN.Labels = labels
|
||||||
|
KNN.DistanceFunc = distfunc
|
||||||
}
|
}
|
||||||
|
|
||||||
//Computes the Euclidean distance between two vectors.
|
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
||||||
func (KNN *KNNClassifier) ComputeDistance(vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 {
|
// @todo: Lots of room to improve this. V messy.
|
||||||
var sum float64
|
|
||||||
|
|
||||||
difference, err := testrow.MinusDense(vector)
|
|
||||||
flat := difference.Array()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, i := range flat {
|
|
||||||
squared := math.Pow(i, 2)
|
|
||||||
sum += squared
|
|
||||||
}
|
|
||||||
|
|
||||||
eucdistance := math.Sqrt(sum)
|
|
||||||
return eucdistance
|
|
||||||
}
|
|
||||||
|
|
||||||
//Returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
|
||||||
func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int) {
|
func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int) {
|
||||||
|
|
||||||
rows := KNN.Data.Rows()
|
rows := KNN.Data.Rows()
|
||||||
@ -51,7 +32,9 @@ func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) (string, []int
|
|||||||
|
|
||||||
for i := 0; i < rows; i++ {
|
for i := 0; i < rows; i++ {
|
||||||
row := KNN.Data.GetRowVector(i)
|
row := KNN.Data.GetRowVector(i)
|
||||||
eucdistance := KNN.ComputeDistance(row, vector)
|
|
||||||
|
//Will put code in to check errs later
|
||||||
|
eucdistance, _ := util.ComputeDistance(KNN.DistanceFunc, row, vector)
|
||||||
rownumbers[i] = eucdistance
|
rownumbers[i] = eucdistance
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,21 +2,23 @@ package knn
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
|
||||||
base "golearn/base"
|
|
||||||
util "golearn/utilities"
|
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
|
util "github.com/sjwhitworth/golearn/utilities"
|
||||||
|
mat "github.com/skelterjohn/go.matrix"
|
||||||
)
|
)
|
||||||
|
|
||||||
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
||||||
type KNNRegressor struct {
|
type KNNRegressor struct {
|
||||||
base.BaseRegressor
|
Data *mat.DenseMatrix
|
||||||
|
Name string
|
||||||
|
Labels []float64
|
||||||
}
|
}
|
||||||
|
|
||||||
//Mints a new classifier.
|
//Mints a new classifier.
|
||||||
func (KNN *KNNRegressor) New(name string, labels []float64, numbers []float64, x int, y int) {
|
func (KNN *KNNRegressor) New(name string, labels []float64, numbers []float64, x int, y int) {
|
||||||
|
|
||||||
KNN.Data = *mat.MakeDenseMatrix(numbers, x, y)
|
KNN.Data = mat.MakeDenseMatrix(numbers, x, y)
|
||||||
KNN.Name = name
|
KNN.Name = name
|
||||||
KNN.Labels = labels
|
KNN.Labels = labels
|
||||||
}
|
}
|
||||||
|
@ -1,31 +1,36 @@
|
|||||||
package utilities
|
package utilities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
util "../utilities"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
mat "github.com/skelterjohn/go.matrix"
|
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
|
mat "github.com/skelterjohn/go.matrix"
|
||||||
)
|
)
|
||||||
|
|
||||||
//Computes the Euclidean distance between two vectors.
|
// Computes the 'distance' between two vectors, where the distance is one of the following methods -
|
||||||
func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 {
|
// euclidean (more to come)
|
||||||
|
func ComputeDistance(metric string, vector *mat.DenseMatrix, testrow *mat.DenseMatrix) (float64, error) {
|
||||||
var sum float64
|
var sum float64
|
||||||
|
|
||||||
// Compute a variety of distance metrics
|
switch metric {
|
||||||
switch metric:
|
case "euclidean":
|
||||||
case "euclidean": {
|
{
|
||||||
difference, err := testrow.MinusDense(vector)
|
difference, err := testrow.MinusDense(vector)
|
||||||
flat := difference.Array()
|
flat := difference.Array()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, i := range flat {
|
||||||
|
squared := math.Pow(i, 2)
|
||||||
|
sum += squared
|
||||||
|
}
|
||||||
|
|
||||||
|
eucdistance := math.Sqrt(sum)
|
||||||
|
return eucdistance, nil
|
||||||
}
|
}
|
||||||
|
default:
|
||||||
for _, i := range flat {
|
return 0.0, fmt.Errorf("ValueError: %s is not an implemented distance method", metric)
|
||||||
squared := math.Pow(i, 2)
|
}
|
||||||
sum += squared
|
}
|
||||||
}
|
|
||||||
|
|
||||||
eucdistance := math.Sqrt(sum)
|
|
||||||
return eucdistance
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user