mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-28 13:48:56 +08:00
Merge branch 'master' into feature/naive
This commit is contained in:
commit
888dfc7e6d
9
.travis.yml
Normal file
9
.travis.yml
Normal file
@ -0,0 +1,9 @@
|
||||
language: go
|
||||
go:
|
||||
- 1.1
|
||||
- 1.2
|
||||
- release
|
||||
- tip
|
||||
install:
|
||||
- go get github.com/smartystreets/goconvey/convey
|
||||
- go get -v ./...
|
52
README.md
52
README.md
@ -4,7 +4,8 @@ GoLearn
|
||||
<img src="http://talks.golang.org/2013/advconc/gopherhat.jpg" width=125><br>
|
||||
[](https://godoc.org/github.com/sjwhitworth/golearn)<br>
|
||||
|
||||
A small start on a machine learning library in Go.
|
||||
GoLearn is a 'batteries included' machine learning library for Go. **Simplicity**, paired with customisability, is the goal.
|
||||
We are in active development, and would love comments from users out in the wild. Drop us a line on Twitter.
|
||||
|
||||
twitter: [@golearn_ml](http://www.twitter.com/golearn_ml)
|
||||
|
||||
@ -17,15 +18,60 @@ cd src/github.com/sjwhitworth/golearn
|
||||
go get ./...
|
||||
```
|
||||
|
||||
Examples
|
||||
Getting Started
|
||||
=======
|
||||
|
||||
Data are loaded in as Instances. You can then perform matrix like operations on them, and pass them to estimators.
|
||||
GoLearn implements the scikit-learn interface of Fit/Predict, so you can easily swap out estimators for trial and error.
|
||||
GoLearn also includes helper functions for data, like cross validation, and train and test splitting.
|
||||
|
||||
```
|
||||
// Load in a dataset, with headers. Header attributes will be stored.
|
||||
// Think of instances as a Data Frame structure in R or Pandas.
|
||||
// You can also create instances from scratch.
|
||||
data, err := base.ParseCSVToInstances("datasets/iris_headers.csv", true)
|
||||
|
||||
// Print a pleasant summary of your data.
|
||||
fmt.Println(data)
|
||||
|
||||
// Split your dataframe into a training set, and a test set, with an 80/20 proportion.
|
||||
trainTest := base.InstancesTrainTestSplit(rawData, 0.8)
|
||||
trainData := trainTest[0]
|
||||
testData := trainTest[1]
|
||||
|
||||
// Instantiate a new KNN classifier. Euclidean distance, with 2 neighbours.
|
||||
cls := knn.NewKnnClassifier("euclidean", 2)
|
||||
|
||||
// Fit it on your training data.
|
||||
cls.Fit(trainData)
|
||||
|
||||
// Get your predictions against test instances.
|
||||
predictions := cls.Predict(testData)
|
||||
|
||||
// Print a confusion matrix with precision and recall metrics.
|
||||
confusionMat := evaluation.GetConfusionMatrix(testData, predictions)
|
||||
fmt.Println(evaluation.GetSummary(confusionMat))
|
||||
```
|
||||
|
||||
```
|
||||
Iris-virginica 28 2 56 0.9333 0.9333 0.9333
|
||||
Iris-setosa 29 0 59 1.0000 1.0000 1.0000
|
||||
Iris-versicolor 27 2 57 0.9310 0.9310 0.9310
|
||||
Overall accuracy: 0.9545
|
||||
```
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
GoLearn comes with practical examples. Dive in and see what is going on.
|
||||
|
||||
```
|
||||
cd examples/
|
||||
go run knnclassifier_iris.go
|
||||
go run instances.go
|
||||
```
|
||||
|
||||
Join the team
|
||||
=============
|
||||
|
||||
If you'd like to contribute, please send me a mail at stephen dot whitworth at hailocab dot com. I will also add you to the team [Slack](https://slack.com) account, which we also use to communicate.
|
||||
Please send me a mail at stephen dot whitworth at hailocab dot com.
|
||||
|
267
base/attributes.go
Normal file
267
base/attributes.go
Normal file
@ -0,0 +1,267 @@
|
||||
package base
|
||||
|
||||
import "fmt"
|
||||
import "strconv"
|
||||
|
||||
const (
|
||||
// CategoricalType is for Attributes which represent values distinctly.
|
||||
CategoricalType = iota
|
||||
// Float64Type should be replaced with a FractionalNumeric type [DEPRECATED].
|
||||
Float64Type
|
||||
)
|
||||
|
||||
// Attribute Attributes disambiguate columns of the feature matrix and declare their types.
|
||||
type Attribute interface {
|
||||
// Returns the general characterstics of this Attribute .
|
||||
// to avoid the overhead of casting
|
||||
GetType() int
|
||||
// Returns the human-readable name of this Attribute.
|
||||
GetName() string
|
||||
// Sets the human-readable name of this Attribute.
|
||||
SetName(string)
|
||||
// Gets a human-readable overview of this Attribute for debugging.
|
||||
String() string
|
||||
// Converts a value given from a human-readable string into a system
|
||||
// representation. For example, a CategoricalAttribute with values
|
||||
// ["iris-setosa", "iris-virginica"] would return the float64
|
||||
// representation of 0 when given "iris-setosa".
|
||||
GetSysValFromString(string) float64
|
||||
// Converts a given value from a system representation into a human
|
||||
// representation. For example, a CategoricalAttribute with values
|
||||
// ["iris-setosa", "iris-viriginica"] might return "iris-setosa"
|
||||
// when given 0.0 as the argument.
|
||||
GetStringFromSysVal(float64) string
|
||||
// Tests for equality with another Attribute. Other Attributes are
|
||||
// considered equal if:
|
||||
// * They have the same type (i.e. FloatAttribute <> CategoricalAttribute)
|
||||
// * They have the same name
|
||||
// * If applicable, they have the same categorical values (though not
|
||||
// necessarily in the same order).
|
||||
Equals(Attribute) bool
|
||||
}
|
||||
|
||||
// FloatAttribute is an implementation which stores floating point
|
||||
// representations of numbers.
|
||||
type FloatAttribute struct {
|
||||
Name string
|
||||
Precision int
|
||||
}
|
||||
|
||||
// NewFloatAttribute returns a new FloatAttribute with a default
|
||||
// precision of 2 decimal places
|
||||
func NewFloatAttribute() *FloatAttribute {
|
||||
return &FloatAttribute{"", 2}
|
||||
}
|
||||
|
||||
// Equals tests a FloatAttribute for equality with another Attribute.
|
||||
//
|
||||
// Returns false if the other Attribute has a different name
|
||||
// or if the other Attribute is not a FloatAttribute.
|
||||
func (Attr *FloatAttribute) Equals(other Attribute) bool {
|
||||
// Check whether this FloatAttribute is equal to another
|
||||
_, ok := other.(*FloatAttribute)
|
||||
if !ok {
|
||||
// Not the same type, so can't be equal
|
||||
return false
|
||||
}
|
||||
if Attr.GetName() != other.GetName() {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// GetName returns this FloatAttribute's human-readable name.
|
||||
func (Attr *FloatAttribute) GetName() string {
|
||||
return Attr.Name
|
||||
}
|
||||
|
||||
// SetName sets this FloatAttribute's human-readable name.
|
||||
func (Attr *FloatAttribute) SetName(name string) {
|
||||
Attr.Name = name
|
||||
}
|
||||
|
||||
// GetType returns Float64Type.
|
||||
func (Attr *FloatAttribute) GetType() int {
|
||||
return Float64Type
|
||||
}
|
||||
|
||||
// String returns a human-readable summary of this Attribute.
|
||||
// e.g. "FloatAttribute(Sepal Width)"
|
||||
func (Attr *FloatAttribute) String() string {
|
||||
return fmt.Sprintf("FloatAttribute(%s)", Attr.Name)
|
||||
}
|
||||
|
||||
// CheckSysValFromString confirms whether a given rawVal can
|
||||
// be converted into a valid system representation.
|
||||
func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) (float64, error) {
|
||||
f, err := strconv.ParseFloat(rawVal, 64)
|
||||
if err != nil {
|
||||
return 0.0, err
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// GetSysValFromString parses the given rawVal string to a float64 and returns it.
|
||||
//
|
||||
// float64 happens to be a 1-to-1 mapping to the system representation.
|
||||
// IMPORTANT: This function panic()s if rawVal is not a valid float.
|
||||
// Use CheckSysValFromString to confirm.
|
||||
func (Attr *FloatAttribute) GetSysValFromString(rawVal string) float64 {
|
||||
f, err := strconv.ParseFloat(rawVal, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
// GetStringFromSysVal converts a given system value to to a string with two decimal
|
||||
// places of precision [TODO: revise this and allow more precision].
|
||||
func (Attr *FloatAttribute) GetStringFromSysVal(rawVal float64) string {
|
||||
formatString := fmt.Sprintf("%%.%df", Attr.Precision)
|
||||
return fmt.Sprintf(formatString, rawVal)
|
||||
}
|
||||
|
||||
// GetSysVal returns the system representation of userVal.
|
||||
//
|
||||
// Because FloatAttribute represents float64 types, this
|
||||
// just returns its argument.
|
||||
func (Attr *FloatAttribute) GetSysVal(userVal float64) float64 {
|
||||
return userVal
|
||||
}
|
||||
|
||||
// GetUsrVal returns the user representation of sysVal.
|
||||
//
|
||||
// Because FloatAttribute represents float64 types, this
|
||||
// just returns its argument.
|
||||
func (Attr *FloatAttribute) GetUsrVal(sysVal float64) float64 {
|
||||
return sysVal
|
||||
}
|
||||
|
||||
// CategoricalAttribute is an Attribute implementation
|
||||
// which stores discrete string values
|
||||
// - useful for representing classes.
|
||||
type CategoricalAttribute struct {
|
||||
Name string
|
||||
values []string
|
||||
}
|
||||
|
||||
func NewCategoricalAttribute() *CategoricalAttribute {
|
||||
return &CategoricalAttribute{
|
||||
"",
|
||||
make([]string, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// GetName returns the human-readable name assigned to this attribute.
|
||||
func (Attr *CategoricalAttribute) GetName() string {
|
||||
return Attr.Name
|
||||
}
|
||||
|
||||
// SetName sets the human-readable name on this attribute.
|
||||
func (Attr *CategoricalAttribute) SetName(name string) {
|
||||
Attr.Name = name
|
||||
}
|
||||
|
||||
// GetType returns CategoricalType to avoid casting overhead.
|
||||
func (Attr *CategoricalAttribute) GetType() int {
|
||||
return CategoricalType
|
||||
}
|
||||
|
||||
// GetSysVal returns the system representation of userVal as an index into the Values slice
|
||||
// If the userVal can't be found, it returns -1.
|
||||
func (Attr *CategoricalAttribute) GetSysVal(userVal string) float64 {
|
||||
for idx, val := range Attr.values {
|
||||
if val == userVal {
|
||||
return float64(idx)
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// GetUsrVal returns a human-readable representation of the given sysVal.
|
||||
//
|
||||
// IMPORTANT: this function doesn't check the boundaries of the array.
|
||||
func (Attr *CategoricalAttribute) GetUsrVal(sysVal float64) string {
|
||||
idx := int(sysVal)
|
||||
return Attr.values[idx]
|
||||
}
|
||||
|
||||
// GetSysValFromString returns the system representation of rawVal
|
||||
// as an index into the Values slice. If rawVal is not inside
|
||||
// the Values slice, it is appended.
|
||||
//
|
||||
// IMPORTANT: If no system representation yet exists, this functions adds it.
|
||||
// If you need to determine whether rawVal exists: use GetSysVal and check
|
||||
// for a -1 return value.
|
||||
//
|
||||
// Example: if the CategoricalAttribute contains the values ["iris-setosa",
|
||||
// "iris-virginica"] and "iris-versicolor" is provided as the argument,
|
||||
// the Values slide becomes ["iris-setosa", "iris-virginica", "iris-versicolor"]
|
||||
// and 2.00 is returned as the system representation.
|
||||
func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) float64 {
|
||||
// Match in raw values
|
||||
catIndex := -1
|
||||
for i, s := range Attr.values {
|
||||
if s == rawVal {
|
||||
catIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if catIndex == -1 {
|
||||
Attr.values = append(Attr.values, rawVal)
|
||||
catIndex = len(Attr.values) - 1
|
||||
}
|
||||
return float64(catIndex)
|
||||
}
|
||||
|
||||
// String returns a human-readable summary of this Attribute.
|
||||
//
|
||||
// Returns a string containing the list of human-readable values this
|
||||
// CategoricalAttribute can take.
|
||||
func (Attr *CategoricalAttribute) String() string {
|
||||
return fmt.Sprintf("CategoricalAttribute(%s)", Attr.values)
|
||||
}
|
||||
|
||||
// GetStringFromSysVal returns a human-readable value from the given system-representation
|
||||
// value val.
|
||||
//
|
||||
// IMPORTANT: This function calls panic() if the value is greater than
|
||||
// the length of the array.
|
||||
// TODO: Return a user-configurable default instead.
|
||||
func (Attr *CategoricalAttribute) GetStringFromSysVal(val float64) string {
|
||||
convVal := int(val)
|
||||
if convVal >= len(Attr.values) {
|
||||
panic(fmt.Sprintf("Out of range: %d in %d", convVal, len(Attr.values)))
|
||||
}
|
||||
return Attr.values[convVal]
|
||||
}
|
||||
|
||||
// Equals checks equality against another Attribute.
|
||||
//
|
||||
// Two CategoricalAttributes are considered equal if they contain
|
||||
// the same values and have the same name. Otherwise, this function
|
||||
// returns false.
|
||||
func (Attr *CategoricalAttribute) Equals(other Attribute) bool {
|
||||
attribute, ok := other.(*CategoricalAttribute)
|
||||
if !ok {
|
||||
// Not the same type, so can't be equal
|
||||
return false
|
||||
}
|
||||
if Attr.GetName() != attribute.GetName() {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check that this CategoricalAttribute has the same
|
||||
// values as the other, in the same order
|
||||
if len(attribute.values) != len(Attr.values) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, a := range Attr.values {
|
||||
if a != attribute.values[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
30
base/classifier.go
Normal file
30
base/classifier.go
Normal file
@ -0,0 +1,30 @@
|
||||
package base
|
||||
|
||||
import (
|
||||
"github.com/gonum/matrix/mat64"
|
||||
)
|
||||
|
||||
// Classifier implementations predict categorical class labels.
|
||||
type Classifier interface {
|
||||
// Takes a set of Instances, copies the class Attribute
|
||||
// and constructs a new set of Instances of equivalent
|
||||
// length with only the class Attribute and fills it in
|
||||
// with predictions.
|
||||
Predict(*Instances) *Instances
|
||||
// Takes a set of instances and updates the Classifier's
|
||||
// internal structures to enable prediction
|
||||
Fit(*Instances)
|
||||
// Why not make every classifier return a nice-looking string?
|
||||
String() string
|
||||
}
|
||||
|
||||
// BaseClassifier stores options common to every classifier.
|
||||
type BaseClassifier struct {
|
||||
TrainingData *Instances
|
||||
}
|
||||
|
||||
type BaseRegressor struct {
|
||||
Data mat64.Dense
|
||||
Name string
|
||||
Labels []float64
|
||||
}
|
213
base/csv.go
Normal file
213
base/csv.go
Normal file
@ -0,0 +1,213 @@
|
||||
package base
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseCSVGetRows returns the number of rows in a given file.
|
||||
func ParseCSVGetRows(filepath string) int {
|
||||
file, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
counter := 0
|
||||
for {
|
||||
_, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
counter++
|
||||
}
|
||||
return counter
|
||||
}
|
||||
|
||||
// ParseCSVGetAttributes returns an ordered slice of appropriate-ly typed
|
||||
// and named Attributes.
|
||||
func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
|
||||
attrs := ParseCSVSniffAttributeTypes(filepath, hasHeaders)
|
||||
names := ParseCSVSniffAttributeNames(filepath, hasHeaders)
|
||||
for i, attr := range attrs {
|
||||
attr.SetName(names[i])
|
||||
}
|
||||
return attrs
|
||||
}
|
||||
|
||||
// ParseCsvSniffAttributeNames returns a slice containing the top row
|
||||
// of a given CSV file, or placeholders if hasHeaders is false.
|
||||
func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
|
||||
file, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
headers, err := reader.Read()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if hasHeaders {
|
||||
for i, h := range headers {
|
||||
headers[i] = strings.TrimSpace(h)
|
||||
}
|
||||
return headers
|
||||
}
|
||||
|
||||
for i := range headers {
|
||||
headers[i] = fmt.Sprintf("%d", i)
|
||||
}
|
||||
return headers
|
||||
|
||||
}
|
||||
|
||||
// ParseCSVSniffAttributeTypes returns a slice of appropriately-typed Attributes.
|
||||
//
|
||||
// The type of a given attribute is determined by looking at the first data row
|
||||
// of the CSV.
|
||||
func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute {
|
||||
file, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer file.Close()
|
||||
reader := csv.NewReader(file)
|
||||
attrs := make([]Attribute, 0)
|
||||
if hasHeaders {
|
||||
_, err := reader.Read()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
columns, err := reader.Read()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, entry := range columns {
|
||||
matched, err := regexp.MatchString("^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$", entry)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if matched {
|
||||
attrs = append(attrs, NewFloatAttribute())
|
||||
} else {
|
||||
attrs = append(attrs, new(CategoricalAttribute))
|
||||
}
|
||||
}
|
||||
|
||||
return attrs
|
||||
}
|
||||
|
||||
// ParseCSVToInstances reads the CSV file given by filepath and returns
|
||||
// the read Instances.
|
||||
func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *Instances, err error) {
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
var ok bool
|
||||
if err, ok = r.(error); !ok {
|
||||
err = fmt.Errorf("golearn: ParseCSVToInstances: %v", r)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Read the number of rows in the file
|
||||
rowCount := ParseCSVGetRows(filepath)
|
||||
if hasHeaders {
|
||||
rowCount--
|
||||
}
|
||||
|
||||
// Read the row headers
|
||||
attrs := ParseCSVGetAttributes(filepath, hasHeaders)
|
||||
|
||||
// Allocate the Instances to return
|
||||
instances = NewInstances(attrs, rowCount)
|
||||
|
||||
// Read the input
|
||||
file, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer file.Close()
|
||||
reader := csv.NewReader(file)
|
||||
|
||||
rowCounter := 0
|
||||
for {
|
||||
record, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if rowCounter == 0 {
|
||||
if hasHeaders {
|
||||
hasHeaders = false
|
||||
continue
|
||||
}
|
||||
}
|
||||
for i := range attrs {
|
||||
instances.SetAttrStr(rowCounter, i, record[i])
|
||||
}
|
||||
rowCounter++
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
//ParseCSV parses a CSV file and returns the number of columns and rows, the headers, the labels associated with
|
||||
//classification, and the data that will be used for training.
|
||||
func ParseCSV(filepath string, label int, columns []int) (int, int, []string, []string, []float64) {
|
||||
labels := make([]string, 0)
|
||||
data := make([]float64, 0)
|
||||
headers := make([]string, 0)
|
||||
rows := 0
|
||||
|
||||
file, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
|
||||
headerrow, _ := reader.Read()
|
||||
|
||||
for _, col := range columns {
|
||||
entry := headerrow[col]
|
||||
headers = append(headers, entry)
|
||||
}
|
||||
|
||||
for {
|
||||
record, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
}
|
||||
|
||||
//
|
||||
labels = append(labels, record[label])
|
||||
|
||||
//Iterate over our rows and append the values to a slice
|
||||
for _, col := range columns {
|
||||
entry := record[col]
|
||||
number, _ := strconv.ParseFloat(entry, 64)
|
||||
data = append(data, number)
|
||||
}
|
||||
rows++
|
||||
}
|
||||
cols := len(columns)
|
||||
return cols, rows, headers, labels, data
|
||||
}
|
106
base/csv_test.go
Normal file
106
base/csv_test.go
Normal file
@ -0,0 +1,106 @@
|
||||
package base
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseCSVGetRows(testEnv *testing.T) {
|
||||
lineCount := ParseCSVGetRows("../examples/datasets/iris.csv")
|
||||
if lineCount != 150 {
|
||||
testEnv.Error("Should have %d lines, has %d", 150, lineCount)
|
||||
}
|
||||
|
||||
lineCount = ParseCSVGetRows("../examples/datasets/iris_headers.csv")
|
||||
if lineCount != 151 {
|
||||
testEnv.Error("Should have %d lines, has %d", 151, lineCount)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestParseCCSVGetAttributes(testEnv *testing.T) {
|
||||
attrs := ParseCSVGetAttributes("../examples/datasets/iris_headers.csv", true)
|
||||
if attrs[0].GetType() != Float64Type {
|
||||
testEnv.Error("First attribute should be a float, %s", attrs[0])
|
||||
}
|
||||
if attrs[0].GetName() != "Sepal length" {
|
||||
testEnv.Error(attrs[0].GetName())
|
||||
}
|
||||
|
||||
if attrs[4].GetType() != CategoricalType {
|
||||
testEnv.Error("Final attribute should be categorical, %s", attrs[4])
|
||||
}
|
||||
if attrs[4].GetName() != "Species" {
|
||||
testEnv.Error(attrs[4])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCsvSniffAttributeTypes(testEnv *testing.T) {
|
||||
attrs := ParseCSVSniffAttributeTypes("../examples/datasets/iris_headers.csv", true)
|
||||
if attrs[0].GetType() != Float64Type {
|
||||
testEnv.Error("First attribute should be a float, %s", attrs[0])
|
||||
}
|
||||
if attrs[1].GetType() != Float64Type {
|
||||
testEnv.Error("Second attribute should be a float, %s", attrs[1])
|
||||
}
|
||||
if attrs[2].GetType() != Float64Type {
|
||||
testEnv.Error("Third attribute should be a float, %s", attrs[2])
|
||||
}
|
||||
if attrs[3].GetType() != Float64Type {
|
||||
testEnv.Error("Fourth attribute should be a float, %s", attrs[3])
|
||||
}
|
||||
if attrs[4].GetType() != CategoricalType {
|
||||
testEnv.Error("Final attribute should be categorical, %s", attrs[4])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCSVSniffAttributeNamesWithHeaders(testEnv *testing.T) {
|
||||
attrs := ParseCSVSniffAttributeNames("../examples/datasets/iris_headers.csv", true)
|
||||
if attrs[0] != "Sepal length" {
|
||||
testEnv.Error(attrs[0])
|
||||
}
|
||||
if attrs[1] != "Sepal width" {
|
||||
testEnv.Error(attrs[1])
|
||||
}
|
||||
if attrs[2] != "Petal length" {
|
||||
testEnv.Error(attrs[2])
|
||||
}
|
||||
if attrs[3] != "Petal width" {
|
||||
testEnv.Error(attrs[3])
|
||||
}
|
||||
if attrs[4] != "Species" {
|
||||
testEnv.Error(attrs[4])
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadInstances(testEnv *testing.T) {
|
||||
inst, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
|
||||
if err != nil {
|
||||
testEnv.Error(err)
|
||||
return
|
||||
}
|
||||
row1 := inst.RowStr(0)
|
||||
row2 := inst.RowStr(50)
|
||||
row3 := inst.RowStr(100)
|
||||
|
||||
if row1 != "5.10 3.50 1.40 0.20 Iris-setosa" {
|
||||
testEnv.Error(row1)
|
||||
}
|
||||
if row2 != "7.00 3.20 4.70 1.40 Iris-versicolor" {
|
||||
testEnv.Error(row2)
|
||||
}
|
||||
if row3 != "6.30 3.30 6.00 2.50 Iris-virginica" {
|
||||
testEnv.Error(row3)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadAwkwardInsatnces(testEnv *testing.T) {
|
||||
inst, err := ParseCSVToInstances("../examples/datasets/chim.csv", true)
|
||||
if err != nil {
|
||||
testEnv.Error(err)
|
||||
return
|
||||
}
|
||||
if inst.GetAttr(0).GetType() != Float64Type {
|
||||
testEnv.Error("Should be float!")
|
||||
}
|
||||
if inst.GetAttr(1).GetType() != CategoricalType {
|
||||
testEnv.Error("Should be discrete!")
|
||||
}
|
||||
}
|
33
base/decompose_test.go
Normal file
33
base/decompose_test.go
Normal file
@ -0,0 +1,33 @@
|
||||
package base
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestDecomp(testEnv *testing.T) {
|
||||
inst, err := ParseCSVToInstances("../examples/datasets/iris_binned.csv", true)
|
||||
if err != nil {
|
||||
testEnv.Error(err)
|
||||
return
|
||||
}
|
||||
decomp := inst.DecomposeOnAttributeValues(inst.GetAttr(0))
|
||||
|
||||
row0 := decomp["0.00"].RowStr(0)
|
||||
row1 := decomp["1.00"].RowStr(0)
|
||||
/* row2 := decomp["2.00"].RowStr(0)
|
||||
row3 := decomp["3.00"].RowStr(0)
|
||||
row4 := decomp["4.00"].RowStr(0)
|
||||
row5 := decomp["5.00"].RowStr(0)
|
||||
row6 := decomp["6.00"].RowStr(0)
|
||||
row7 := decomp["7.00"].RowStr(0)*/
|
||||
row8 := decomp["8.00"].RowStr(0)
|
||||
// row9 := decomp["9.00"].RowStr(0)
|
||||
|
||||
if row0 != "3.10 1.50 0.20 Iris-setosa" {
|
||||
testEnv.Error(row0)
|
||||
}
|
||||
if row1 != "3.00 1.40 0.20 Iris-setosa" {
|
||||
testEnv.Error(row1)
|
||||
}
|
||||
if row8 != "2.90 6.30 1.80 Iris-virginica" {
|
||||
testEnv.Error(row8)
|
||||
}
|
||||
}
|
519
base/instances.go
Normal file
519
base/instances.go
Normal file
@ -0,0 +1,519 @@
|
||||
package base
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/gonum/matrix/mat64"
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
// SortDirection specifies sorting direction...
|
||||
type SortDirection int
|
||||
|
||||
const (
|
||||
// Descending says that Instances should be sorted high to low...
|
||||
Descending SortDirection = 1
|
||||
// Ascending states that Instances should be sorted low to high...
|
||||
Ascending SortDirection = 2
|
||||
)
|
||||
|
||||
const highBit int64 = -1 << 63
|
||||
|
||||
// Instances represents a grid of numbers (typed by Attributes)
|
||||
// stored internally in mat.DenseMatrix as float64's.
|
||||
// See docs/instances.md for more information.
|
||||
type Instances struct {
|
||||
storage *mat64.Dense
|
||||
attributes []Attribute
|
||||
Rows int
|
||||
Cols int
|
||||
ClassIndex int
|
||||
}
|
||||
|
||||
func xorFloatOp(item float64) float64 {
|
||||
var ret float64
|
||||
var tmp int64
|
||||
buf := bytes.NewBuffer(nil)
|
||||
binary.Write(buf, binary.LittleEndian, item)
|
||||
binary.Read(buf, binary.LittleEndian, &tmp)
|
||||
tmp ^= -1 << 63
|
||||
binary.Write(buf, binary.LittleEndian, tmp)
|
||||
binary.Read(buf, binary.LittleEndian, &ret)
|
||||
return ret
|
||||
}
|
||||
|
||||
func printFloatByteArr(arr [][]byte) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
var f float64
|
||||
for _, b := range arr {
|
||||
buf.Write(b)
|
||||
binary.Read(buf, binary.LittleEndian, &f)
|
||||
f = xorFloatOp(f)
|
||||
fmt.Println(f)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort does an in-place radix sort of Instances, using SortDirection
|
||||
// direction (Ascending or Descending) with attrs as a slice of Attribute
|
||||
// indices that you want to sort by.
|
||||
//
|
||||
// IMPORTANT: Radix sort is not stable, so ordering outside
|
||||
// the attributes used for sorting is arbitrary.
|
||||
func (inst *Instances) Sort(direction SortDirection, attrs []int) {
|
||||
// Create a buffer
|
||||
buf := bytes.NewBuffer(nil)
|
||||
ds := make([][]byte, inst.Rows)
|
||||
rs := make([]int, inst.Rows)
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
byteBuf := make([]byte, 8*len(attrs))
|
||||
for _, a := range attrs {
|
||||
x := inst.storage.At(i, a)
|
||||
binary.Write(buf, binary.LittleEndian, xorFloatOp(x))
|
||||
}
|
||||
buf.Read(byteBuf)
|
||||
ds[i] = byteBuf
|
||||
rs[i] = i
|
||||
}
|
||||
// Sort viua
|
||||
valueBins := make([][][]byte, 256)
|
||||
rowBins := make([][]int, 256)
|
||||
for i := 0; i < 8*len(attrs); i++ {
|
||||
for j := 0; j < len(ds); j++ {
|
||||
// Address each row value by it's ith byte
|
||||
b := ds[j]
|
||||
valueBins[b[i]] = append(valueBins[b[i]], b)
|
||||
rowBins[b[i]] = append(rowBins[b[i]], rs[j])
|
||||
}
|
||||
j := 0
|
||||
for k := 0; k < 256; k++ {
|
||||
bs := valueBins[k]
|
||||
rc := rowBins[k]
|
||||
copy(ds[j:], bs)
|
||||
copy(rs[j:], rc)
|
||||
j += len(bs)
|
||||
valueBins[k] = bs[:0]
|
||||
rowBins[k] = rc[:0]
|
||||
}
|
||||
}
|
||||
|
||||
for _, b := range ds {
|
||||
var v float64
|
||||
buf.Write(b)
|
||||
binary.Read(buf, binary.LittleEndian, &v)
|
||||
}
|
||||
|
||||
done := make([]bool, inst.Rows)
|
||||
for index := range rs {
|
||||
if done[index] {
|
||||
continue
|
||||
}
|
||||
j := index
|
||||
for {
|
||||
done[j] = true
|
||||
if rs[j] != index {
|
||||
inst.swapRows(j, rs[j])
|
||||
j = rs[j]
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if direction == Descending {
|
||||
// Reverse the matrix
|
||||
for i, j := 0, inst.Rows-1; i < j; i, j = i+1, j-1 {
|
||||
inst.swapRows(i, j)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NewInstances returns a preallocated Instances structure
|
||||
// with some helful values pre-filled.
|
||||
func NewInstances(attrs []Attribute, rows int) *Instances {
|
||||
rawStorage := make([]float64, rows*len(attrs))
|
||||
return NewInstancesFromRaw(attrs, rows, rawStorage)
|
||||
}
|
||||
|
||||
// CheckNewInstancesFromRaw checks whether a call to NewInstancesFromRaw
|
||||
// is likely to produce an error-free result.
|
||||
func CheckNewInstancesFromRaw(attrs []Attribute, rows int, data []float64) error {
|
||||
size := rows * len(attrs)
|
||||
if size < len(data) {
|
||||
return errors.New("base: data length is larger than the rows * attribute space.")
|
||||
} else if size > len(data) {
|
||||
return errors.New("base: data is smaller than the rows * attribute space")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewInstancesFromRaw wraps a slice of float64 numbers in a
|
||||
// mat64.Dense structure, reshaping it with the given number of rows
|
||||
// and representing it with the given attrs (Attribute slice)
|
||||
//
|
||||
// IMPORTANT: if the |attrs| * |rows| value doesn't equal len(data)
|
||||
// then panic()s may occur. Use CheckNewInstancesFromRaw to confirm.
|
||||
func NewInstancesFromRaw(attrs []Attribute, rows int, data []float64) *Instances {
|
||||
rawStorage := mat64.NewDense(rows, len(attrs), data)
|
||||
return NewInstancesFromDense(attrs, rows, rawStorage)
|
||||
}
|
||||
|
||||
// NewInstancesFromDense creates a set of Instances from a mat64.Dense
|
||||
// matrix
|
||||
func NewInstancesFromDense(attrs []Attribute, rows int, mat *mat64.Dense) *Instances {
|
||||
return &Instances{mat, attrs, rows, len(attrs), len(attrs) - 1}
|
||||
}
|
||||
|
||||
// InstancesTrainTestSplit takes a given Instances (src) and a train-test fraction
|
||||
// (prop) and returns an array of two new Instances, one containing approximately
|
||||
// that fraction and the other containing what's left.
|
||||
//
|
||||
// IMPORTANT: this function is only meaningful when prop is between 0.0 and 1.0.
|
||||
// Using any other values may result in odd behaviour.
|
||||
func InstancesTrainTestSplit(src *Instances, prop float64) [2](*Instances) {
|
||||
trainingRows := make([]int, 0)
|
||||
testingRows := make([]int, 0)
|
||||
numAttrs := len(src.attributes)
|
||||
for i := 0; i < src.Rows; i++ {
|
||||
trainOrTest := rand.Intn(101)
|
||||
if trainOrTest > int(100*prop) {
|
||||
trainingRows = append(trainingRows, i)
|
||||
} else {
|
||||
testingRows = append(testingRows, i)
|
||||
}
|
||||
}
|
||||
|
||||
rawTrainMatrix := mat64.NewDense(len(trainingRows), numAttrs, make([]float64, len(trainingRows)*numAttrs))
|
||||
rawTestMatrix := mat64.NewDense(len(testingRows), numAttrs, make([]float64, len(testingRows)*numAttrs))
|
||||
|
||||
for i, row := range trainingRows {
|
||||
rowDat := src.storage.RowView(row)
|
||||
rawTrainMatrix.SetRow(i, rowDat)
|
||||
}
|
||||
for i, row := range testingRows {
|
||||
rowDat := src.storage.RowView(row)
|
||||
rawTestMatrix.SetRow(i, rowDat)
|
||||
}
|
||||
|
||||
var ret [2]*Instances
|
||||
ret[0] = NewInstancesFromDense(src.attributes, len(trainingRows), rawTrainMatrix)
|
||||
ret[1] = NewInstancesFromDense(src.attributes, len(testingRows), rawTestMatrix)
|
||||
return ret
|
||||
}
|
||||
|
||||
// CountAttrValues returns the distribution of values of a given
|
||||
// Attribute.
|
||||
// IMPORTANT: calls panic() if the attribute index of a cannot be
|
||||
// determined. Call GetAttrIndex(a) and check for a -1 return value.
|
||||
func (inst *Instances) CountAttrValues(a Attribute) map[string]int {
|
||||
ret := make(map[string]int)
|
||||
attrIndex := inst.GetAttrIndex(a)
|
||||
if attrIndex == -1 {
|
||||
panic("Invalid attribute")
|
||||
}
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
sysVal := inst.Get(i, attrIndex)
|
||||
stringVal := a.GetStringFromSysVal(sysVal)
|
||||
ret[stringVal] += 1
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// CountClassValues returns the class distribution of this
|
||||
// Instances set
|
||||
func (inst *Instances) CountClassValues() map[string]int {
|
||||
a := inst.GetAttr(inst.ClassIndex)
|
||||
return inst.CountAttrValues(a)
|
||||
}
|
||||
|
||||
// DecomposeOnAttributeValues divides the instance set depending on the
|
||||
// value of a given Attribute, constructs child instances, and returns
|
||||
// them in a map keyed on the string value of that Attribute.
|
||||
// IMPORTANT: calls panic() if the attribute index of at cannot be determined.
|
||||
// Use GetAttrIndex(at) and check for a non-zero return value.
|
||||
func (inst *Instances) DecomposeOnAttributeValues(at Attribute) map[string]*Instances {
|
||||
// Find the attribute we're decomposing on
|
||||
attrIndex := inst.GetAttrIndex(at)
|
||||
if attrIndex == -1 {
|
||||
panic("Invalid attribute index")
|
||||
}
|
||||
// Construct the new attribute set
|
||||
newAttrs := make([]Attribute, 0)
|
||||
for i := range inst.attributes {
|
||||
a := inst.attributes[i]
|
||||
if a.Equals(at) {
|
||||
continue
|
||||
}
|
||||
newAttrs = append(newAttrs, a)
|
||||
}
|
||||
// Create the return map, several counting maps
|
||||
ret := make(map[string]*Instances)
|
||||
counts := inst.CountAttrValues(at) // So we know what to allocate
|
||||
rows := make(map[string]int)
|
||||
for k := range counts {
|
||||
tmp := NewInstances(newAttrs, counts[k])
|
||||
ret[k] = tmp
|
||||
}
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
newAttrCounter := 0
|
||||
classVar := at.GetStringFromSysVal(inst.Get(i, attrIndex))
|
||||
dest := ret[classVar]
|
||||
destRow := rows[classVar]
|
||||
for j := 0; j < inst.Cols; j++ {
|
||||
a := inst.attributes[j]
|
||||
if a.Equals(at) {
|
||||
continue
|
||||
}
|
||||
dest.Set(destRow, newAttrCounter, inst.Get(i, j))
|
||||
newAttrCounter++
|
||||
}
|
||||
rows[classVar]++
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// Get returns the system representation (float64) of the value
|
||||
// stored at the given row and col coordinate.
|
||||
func (inst *Instances) Get(row int, col int) float64 {
|
||||
return inst.storage.At(row, col)
|
||||
}
|
||||
|
||||
// Set sets the system representation (float64) to val at the
|
||||
// given row and column coordinate.
|
||||
func (inst *Instances) Set(row int, col int, val float64) {
|
||||
inst.storage.Set(row, col, val)
|
||||
}
|
||||
|
||||
// GetRowVector returns a row of system representation
|
||||
// values at the given row index.
|
||||
func (inst *Instances) GetRowVector(row int) []float64 {
|
||||
return inst.storage.RowView(row)
|
||||
}
|
||||
|
||||
// GetRowVector returns a row of system representation
|
||||
// values at the given row index, excluding the class attribute
|
||||
func (inst *Instances) GetRowVectorWithoutClass(row int) []float64 {
|
||||
rawRow := make([]float64, inst.Cols)
|
||||
copy(rawRow, inst.GetRowVector(row))
|
||||
return append(rawRow[0:inst.ClassIndex], rawRow[inst.ClassIndex+1:inst.Cols]...)
|
||||
}
|
||||
|
||||
// GetClass returns the string representation of the given
|
||||
// row's class, as determined by the Attribute at the ClassIndex
|
||||
// position from GetAttr
|
||||
func (inst *Instances) GetClass(row int) string {
|
||||
attr := inst.GetAttr(inst.ClassIndex)
|
||||
val := inst.Get(row, inst.ClassIndex)
|
||||
return attr.GetStringFromSysVal(val)
|
||||
}
|
||||
|
||||
func (Inst *Instances) GetClassAttrPtr() *Attribute {
|
||||
attr := Inst.GetAttr(Inst.ClassIndex)
|
||||
return &attr
|
||||
}
|
||||
|
||||
func (Inst *Instances) GetClassAttr() Attribute {
|
||||
return Inst.GetAttr(Inst.ClassIndex)
|
||||
}
|
||||
|
||||
//
|
||||
// Attribute functions
|
||||
//
|
||||
|
||||
// GetAttributeCount returns the number of attributes represented.
|
||||
func (inst *Instances) GetAttributeCount() int {
|
||||
// Return the number of attributes attached to this Instance set
|
||||
return len(inst.attributes)
|
||||
}
|
||||
|
||||
// SetAttrStr sets the system-representation value of row in column attr
|
||||
// to value val, implicitly converting the string to system-representation
|
||||
// via the appropriate Attribute function.
|
||||
func (inst *Instances) SetAttrStr(row int, attr int, val string) {
|
||||
// Set an attribute on a particular row from a string value
|
||||
a := inst.attributes[attr]
|
||||
sysVal := a.GetSysValFromString(val)
|
||||
inst.storage.Set(row, attr, sysVal)
|
||||
}
|
||||
|
||||
// GetAttrStr returns a human-readable string value stored in column `attr'
|
||||
// and row `row', as determined by the appropriate Attribute function.
|
||||
func (inst *Instances) GetAttrStr(row int, attr int) string {
|
||||
// Get a human-readable value from a particular row
|
||||
a := inst.attributes[attr]
|
||||
usrVal := a.GetStringFromSysVal(inst.Get(row, attr))
|
||||
return usrVal
|
||||
}
|
||||
|
||||
// GetAttr returns information about an attribute at given index
|
||||
// in the attributes slice.
|
||||
func (inst *Instances) GetAttr(attrIndex int) Attribute {
|
||||
// Return a copy of an attribute attached to this Instance set
|
||||
return inst.attributes[attrIndex]
|
||||
}
|
||||
|
||||
// GetAttrIndex returns the offset of a given Attribute `a' to an
|
||||
// index in the attributes slice
|
||||
func (inst *Instances) GetAttrIndex(of Attribute) int {
|
||||
// Finds the offset of an Attribute in this instance set
|
||||
// Returns -1 if no Attribute matches
|
||||
for i, a := range inst.attributes {
|
||||
if a.Equals(of) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// ReplaceAttr overwrites the attribute at `index' with `a'
|
||||
func (inst *Instances) ReplaceAttr(index int, a Attribute) {
|
||||
// Replace an Attribute at index with another
|
||||
// DOESN'T CONVERT ANY EXISTING VALUES
|
||||
inst.attributes[index] = a
|
||||
}
|
||||
|
||||
//
|
||||
// Printing functions
|
||||
//
|
||||
|
||||
// RowStr returns a human-readable representation of a given row.
|
||||
func (inst *Instances) RowStr(row int) string {
|
||||
// Prints a given row
|
||||
var buffer bytes.Buffer
|
||||
for j := 0; j < inst.Cols; j++ {
|
||||
val := inst.storage.At(row, j)
|
||||
a := inst.attributes[j]
|
||||
postfix := " "
|
||||
if j == inst.Cols-1 {
|
||||
postfix = ""
|
||||
}
|
||||
buffer.WriteString(fmt.Sprintf("%s%s", a.GetStringFromSysVal(val), postfix))
|
||||
}
|
||||
return buffer.String()
|
||||
}
|
||||
|
||||
func (inst *Instances) String() string {
|
||||
var buffer bytes.Buffer
|
||||
|
||||
buffer.WriteString("Instances with ")
|
||||
buffer.WriteString(fmt.Sprintf("%d row(s) ", inst.Rows))
|
||||
buffer.WriteString(fmt.Sprintf("%d attribute(s)\n", inst.Cols))
|
||||
|
||||
buffer.WriteString(fmt.Sprintf("Attributes: \n"))
|
||||
for i, a := range inst.attributes {
|
||||
prefix := "\t"
|
||||
if i == inst.ClassIndex {
|
||||
prefix = "*\t"
|
||||
}
|
||||
buffer.WriteString(fmt.Sprintf("%s%s\n", prefix, a))
|
||||
}
|
||||
|
||||
buffer.WriteString("\nData:\n")
|
||||
maxRows := 30
|
||||
if inst.Rows < maxRows {
|
||||
maxRows = inst.Rows
|
||||
}
|
||||
|
||||
for i := 0; i < maxRows; i++ {
|
||||
buffer.WriteString("\t")
|
||||
for j := 0; j < inst.Cols; j++ {
|
||||
val := inst.storage.At(i, j)
|
||||
a := inst.attributes[j]
|
||||
buffer.WriteString(fmt.Sprintf("%s ", a.GetStringFromSysVal(val)))
|
||||
}
|
||||
buffer.WriteString("\n")
|
||||
}
|
||||
|
||||
missingRows := inst.Rows - maxRows
|
||||
if missingRows != 0 {
|
||||
buffer.WriteString(fmt.Sprintf("\t...\n%d row(s) undisplayed", missingRows))
|
||||
} else {
|
||||
buffer.WriteString("All rows displayed")
|
||||
}
|
||||
|
||||
return buffer.String()
|
||||
}
|
||||
|
||||
// SelectAttributes returns a new instance set containing
|
||||
// the values from this one with only the Attributes specified
|
||||
func (inst *Instances) SelectAttributes(attrs []Attribute) *Instances {
|
||||
ret := NewInstances(attrs, inst.Rows)
|
||||
attrIndices := make([]int, 0)
|
||||
for _, a := range attrs {
|
||||
attrIndex := inst.GetAttrIndex(a)
|
||||
attrIndices = append(attrIndices, attrIndex)
|
||||
}
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
for j, a := range attrIndices {
|
||||
ret.Set(i, j, inst.Get(i, a))
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// GeneratePredictionVector generates a new set of Instances
|
||||
// with the same number of rows, but only this Instance set's
|
||||
// class Attribute.
|
||||
func (inst *Instances) GeneratePredictionVector() *Instances {
|
||||
attrs := make([]Attribute, 1)
|
||||
attrs[0] = inst.GetClassAttr()
|
||||
ret := NewInstances(attrs, inst.Rows)
|
||||
return ret
|
||||
}
|
||||
|
||||
// Shuffle randomizes the row order in place
|
||||
func (inst *Instances) Shuffle() {
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
j := rand.Intn(inst.Rows)
|
||||
inst.swapRows(i, j)
|
||||
}
|
||||
}
|
||||
|
||||
// SampleWithReplacement returns a new set of Instances of size `size'
|
||||
// containing random rows from this set of Instances.
|
||||
//
|
||||
// IMPORTANT: There's a high chance of seeing duplicate rows
|
||||
// whenever size is close to the row count.
|
||||
func (inst *Instances) SampleWithReplacement(size int) *Instances {
|
||||
ret := NewInstances(inst.attributes, size)
|
||||
for i := 0; i < size; i++ {
|
||||
srcRow := rand.Intn(inst.Rows)
|
||||
for j := 0; j < inst.Cols; j++ {
|
||||
ret.Set(i, j, inst.Get(srcRow, j))
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// Equal checks whether a given Instance set is exactly the same
|
||||
// as another: same size and same values (as determined by the Attributes)
|
||||
//
|
||||
// IMPORTANT: does not explicitly check if the Attributes are considered equal.
|
||||
func (inst *Instances) Equal(other *Instances) bool {
|
||||
if inst.Rows != other.Rows {
|
||||
return false
|
||||
}
|
||||
if inst.Cols != other.Cols {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
for j := 0; j < inst.Cols; j++ {
|
||||
if inst.GetAttrStr(i, j) != other.GetAttrStr(i, j) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (inst *Instances) swapRows(r1 int, r2 int) {
|
||||
row1buf := make([]float64, inst.Cols)
|
||||
row2buf := make([]float64, inst.Cols)
|
||||
row1 := inst.storage.RowView(r1)
|
||||
row2 := inst.storage.RowView(r2)
|
||||
copy(row1buf, row1)
|
||||
copy(row2buf, row2)
|
||||
inst.storage.SetRow(r1, row2buf)
|
||||
inst.storage.SetRow(r2, row1buf)
|
||||
}
|
107
base/sort_test.go
Normal file
107
base/sort_test.go
Normal file
@ -0,0 +1,107 @@
|
||||
package base
|
||||
|
||||
import "testing"
|
||||
|
||||
func isSortedAsc(inst *Instances, attrIndex int) bool {
|
||||
valPrev := 0.0
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
cur := inst.Get(i, attrIndex)
|
||||
if i > 0 {
|
||||
if valPrev > cur {
|
||||
return false
|
||||
}
|
||||
}
|
||||
valPrev = cur
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isSortedDesc(inst *Instances, attrIndex int) bool {
|
||||
valPrev := 0.0
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
cur := inst.Get(i, attrIndex)
|
||||
if i > 0 {
|
||||
if valPrev < cur {
|
||||
return false
|
||||
}
|
||||
}
|
||||
valPrev = cur
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func TestSortDesc(testEnv *testing.T) {
|
||||
inst1, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
|
||||
if err != nil {
|
||||
testEnv.Error(err)
|
||||
return
|
||||
}
|
||||
inst2, err := ParseCSVToInstances("../examples/datasets/iris_sorted_desc.csv", true)
|
||||
if err != nil {
|
||||
testEnv.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
if isSortedDesc(inst1, 0) {
|
||||
testEnv.Error("Can't test descending sort order")
|
||||
}
|
||||
if !isSortedDesc(inst2, 0) {
|
||||
testEnv.Error("Reference data not sorted in descending order!")
|
||||
}
|
||||
attrs := make([]int, 4)
|
||||
attrs[0] = 3
|
||||
attrs[1] = 2
|
||||
attrs[2] = 1
|
||||
attrs[3] = 0
|
||||
inst1.Sort(Descending, attrs)
|
||||
if !isSortedDesc(inst1, 0) {
|
||||
testEnv.Error("Instances are not sorted in descending order")
|
||||
testEnv.Error(inst1)
|
||||
}
|
||||
if !inst2.Equal(inst1) {
|
||||
inst1.storage.Sub(inst1.storage, inst2.storage)
|
||||
testEnv.Error(inst1.storage)
|
||||
testEnv.Error("Instances don't match")
|
||||
testEnv.Error(inst1)
|
||||
testEnv.Error(inst2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortAsc(testEnv *testing.T) {
|
||||
inst, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
|
||||
if isSortedAsc(inst, 0) {
|
||||
testEnv.Error("Can't test ascending sort on something ascending already")
|
||||
}
|
||||
if err != nil {
|
||||
testEnv.Error(err)
|
||||
return
|
||||
}
|
||||
attrs := make([]int, 4)
|
||||
attrs[0] = 3
|
||||
attrs[1] = 2
|
||||
attrs[2] = 1
|
||||
attrs[3] = 0
|
||||
inst.Sort(Ascending, attrs)
|
||||
if !isSortedAsc(inst, 0) {
|
||||
testEnv.Error("Instances are not sorted in ascending order")
|
||||
testEnv.Error(inst)
|
||||
}
|
||||
|
||||
inst2, err := ParseCSVToInstances("../examples/datasets/iris_sorted_asc.csv", true)
|
||||
if err != nil {
|
||||
testEnv.Error(err)
|
||||
return
|
||||
}
|
||||
if !isSortedAsc(inst2, 0) {
|
||||
testEnv.Error("This file should be sorted in ascending order")
|
||||
}
|
||||
|
||||
if !inst2.Equal(inst) {
|
||||
inst.storage.Sub(inst.storage, inst2.storage)
|
||||
testEnv.Error(inst.storage)
|
||||
testEnv.Error("Instances don't match")
|
||||
testEnv.Error(inst)
|
||||
testEnv.Error(inst2)
|
||||
}
|
||||
|
||||
}
|
57
data/csv.go
57
data/csv.go
@ -1,57 +0,0 @@
|
||||
/* Data - consists of helper functions for parsing different data formats */
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
//Parses a CSV file, returning the number of columns and rows, the headers, the labels associated with
|
||||
//classification, and the data that will be used for training.
|
||||
func ParseCsv(filepath string, label int, columns []int) (int, int, []string, []string, []float64) {
|
||||
labels := make([]string, 0)
|
||||
data := make([]float64, 0)
|
||||
headers := make([]string, 0)
|
||||
rows := 0
|
||||
|
||||
file, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := csv.NewReader(file)
|
||||
|
||||
headerrow, _ := reader.Read()
|
||||
|
||||
for _, col := range columns {
|
||||
entry := headerrow[col]
|
||||
headers = append(headers, entry)
|
||||
}
|
||||
|
||||
for {
|
||||
record, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
}
|
||||
|
||||
//
|
||||
labels = append(labels, record[label])
|
||||
|
||||
//Iterate over our rows and append the values to a slice
|
||||
for _, col := range columns {
|
||||
entry := record[col]
|
||||
number, _ := strconv.ParseFloat(entry, 64)
|
||||
data = append(data, number)
|
||||
}
|
||||
rows += 1
|
||||
}
|
||||
cols := len(columns)
|
||||
return cols, rows, headers, labels, data
|
||||
}
|
191
evaluation/confusion.go
Normal file
191
evaluation/confusion.go
Normal file
@ -0,0 +1,191 @@
|
||||
package evaluation
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/sjwhitworth/golearn/base"
|
||||
)
|
||||
|
||||
// ConfusionMatrix is a nested map of actual and predicted class counts
|
||||
type ConfusionMatrix map[string]map[string]int
|
||||
|
||||
// GetConfusionMatrix builds a ConfusionMatrix from a set of reference (`ref')
|
||||
// and generate (`gen') Instances.
|
||||
func GetConfusionMatrix(ref *base.Instances, gen *base.Instances) map[string]map[string]int {
|
||||
|
||||
if ref.Rows != gen.Rows {
|
||||
panic("Row counts should match")
|
||||
}
|
||||
|
||||
ret := make(map[string]map[string]int)
|
||||
|
||||
for i := 0; i < ref.Rows; i++ {
|
||||
referenceClass := ref.GetClass(i)
|
||||
predictedClass := gen.GetClass(i)
|
||||
if _, ok := ret[referenceClass]; ok {
|
||||
ret[referenceClass][predictedClass] += 1
|
||||
} else {
|
||||
ret[referenceClass] = make(map[string]int)
|
||||
ret[referenceClass][predictedClass] = 1
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// GetTruePositives returns the number of times an entry is
|
||||
// predicted successfully in a given ConfusionMatrix.
|
||||
func GetTruePositives(class string, c ConfusionMatrix) float64 {
|
||||
return float64(c[class][class])
|
||||
}
|
||||
|
||||
// GetFalsePositives returns the number of times an entry is
|
||||
// incorrectly predicted as having a given class.
|
||||
func GetFalsePositives(class string, c ConfusionMatrix) float64 {
|
||||
ret := 0.0
|
||||
for k := range c {
|
||||
if k == class {
|
||||
continue
|
||||
}
|
||||
ret += float64(c[k][class])
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// GetFalseNegatives returns the number of times an entry is
|
||||
// incorrectly predicted as something other than the given class.
|
||||
func GetFalseNegatives(class string, c ConfusionMatrix) float64 {
|
||||
ret := 0.0
|
||||
for k := range c[class] {
|
||||
if k == class {
|
||||
continue
|
||||
}
|
||||
ret += float64(c[class][k])
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// GetTrueNegatives returns the number of times an entry is
|
||||
// correctly predicted as something other than the given class.
|
||||
func GetTrueNegatives(class string, c ConfusionMatrix) float64 {
|
||||
ret := 0.0
|
||||
for k := range c {
|
||||
if k == class {
|
||||
continue
|
||||
}
|
||||
for l := range c[k] {
|
||||
if l == class {
|
||||
continue
|
||||
}
|
||||
ret += float64(c[k][l])
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// GetPrecision returns the fraction of of the total predictions
|
||||
// for a given class which were correct.
|
||||
func GetPrecision(class string, c ConfusionMatrix) float64 {
|
||||
// Fraction of retrieved instances that are relevant
|
||||
truePositives := GetTruePositives(class, c)
|
||||
falsePositives := GetFalsePositives(class, c)
|
||||
return truePositives / (truePositives + falsePositives)
|
||||
}
|
||||
|
||||
// GetRecall returns the fraction of the total occurrences of a
|
||||
// given class which were predicted.
|
||||
func GetRecall(class string, c ConfusionMatrix) float64 {
|
||||
// Fraction of relevant instances that are retrieved
|
||||
truePositives := GetTruePositives(class, c)
|
||||
falseNegatives := GetFalseNegatives(class, c)
|
||||
return truePositives / (truePositives + falseNegatives)
|
||||
}
|
||||
|
||||
// GetF1Score computes the harmonic mean of precision and recall
|
||||
// (equivalently called F-measure)
|
||||
func GetF1Score(class string, c ConfusionMatrix) float64 {
|
||||
precision := GetPrecision(class, c)
|
||||
recall := GetRecall(class, c)
|
||||
return 2 * (precision * recall) / (precision + recall)
|
||||
}
|
||||
|
||||
// GetAccuracy computes the overall classification accuracy
|
||||
// That is (number of correctly classified instances) / total instances
|
||||
func GetAccuracy(c ConfusionMatrix) float64 {
|
||||
correct := 0
|
||||
total := 0
|
||||
for i := range c {
|
||||
for j := range c[i] {
|
||||
if i == j {
|
||||
correct += c[i][j]
|
||||
}
|
||||
total += c[i][j]
|
||||
}
|
||||
}
|
||||
return float64(correct) / float64(total)
|
||||
}
|
||||
|
||||
// GetMicroPrecision assesses Classifier performance across
|
||||
// all classes using the total true positives and false positives.
|
||||
func GetMicroPrecision(c ConfusionMatrix) float64 {
|
||||
truePositives := 0.0
|
||||
falsePositives := 0.0
|
||||
for k := range c {
|
||||
truePositives += GetTruePositives(k, c)
|
||||
falsePositives += GetFalsePositives(k, c)
|
||||
}
|
||||
return truePositives / (truePositives + falsePositives)
|
||||
}
|
||||
|
||||
// GetMacroPrecision assesses Classifier performance across all
|
||||
// classes by averaging the precision measures achieved for each class.
|
||||
func GetMacroPrecision(c ConfusionMatrix) float64 {
|
||||
precisionVals := 0.0
|
||||
for k := range c {
|
||||
precisionVals += GetPrecision(k, c)
|
||||
}
|
||||
return precisionVals / float64(len(c))
|
||||
}
|
||||
|
||||
// GetMicroRecall assesses Classifier performance across all
|
||||
// classes using the total true positives and false negatives.
|
||||
func GetMicroRecall(c ConfusionMatrix) float64 {
|
||||
truePositives := 0.0
|
||||
falseNegatives := 0.0
|
||||
for k := range c {
|
||||
truePositives += GetTruePositives(k, c)
|
||||
falseNegatives += GetFalseNegatives(k, c)
|
||||
}
|
||||
return truePositives / (truePositives + falseNegatives)
|
||||
}
|
||||
|
||||
// GetMacroRecall assesses Classifier performance across all classes
|
||||
// by averaging the recall measures achieved for each class
|
||||
func GetMacroRecall(c ConfusionMatrix) float64 {
|
||||
recallVals := 0.0
|
||||
for k := range c {
|
||||
recallVals += GetRecall(k, c)
|
||||
}
|
||||
return recallVals / float64(len(c))
|
||||
}
|
||||
|
||||
// GetSummary returns a table of precision, recall, true positive,
|
||||
// false positive, and true negatives for each class for a given
|
||||
// ConfusionMatrix
|
||||
func GetSummary(c ConfusionMatrix) string {
|
||||
var buffer bytes.Buffer
|
||||
for k := range c {
|
||||
buffer.WriteString(k)
|
||||
buffer.WriteString("\t")
|
||||
tp := GetTruePositives(k, c)
|
||||
fp := GetFalsePositives(k, c)
|
||||
tn := GetTrueNegatives(k, c)
|
||||
prec := GetPrecision(k, c)
|
||||
rec := GetRecall(k, c)
|
||||
f1 := GetF1Score(k, c)
|
||||
buffer.WriteString(fmt.Sprintf("%.0f\t%.0f\t%.0f\t%.4f\t%.4f\t%.4f\n", tp, fp, tn, prec, rec, f1))
|
||||
}
|
||||
|
||||
buffer.WriteString(fmt.Sprintf("Overall accuracy: %.4f\n", GetAccuracy(c)))
|
||||
|
||||
return buffer.String()
|
||||
}
|
104
evaluation/confusion_test.go
Normal file
104
evaluation/confusion_test.go
Normal file
@ -0,0 +1,104 @@
|
||||
package evaluation
|
||||
|
||||
import (
|
||||
"math"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMetrics(testEnv *testing.T) {
|
||||
confusionMat := make(ConfusionMatrix)
|
||||
confusionMat["a"] = make(map[string]int)
|
||||
confusionMat["b"] = make(map[string]int)
|
||||
confusionMat["a"]["a"] = 75
|
||||
confusionMat["a"]["b"] = 5
|
||||
confusionMat["b"]["a"] = 10
|
||||
confusionMat["b"]["b"] = 10
|
||||
|
||||
tp := GetTruePositives("a", confusionMat)
|
||||
if math.Abs(tp-75) >= 1 {
|
||||
testEnv.Error(tp)
|
||||
}
|
||||
tp = GetTruePositives("b", confusionMat)
|
||||
if math.Abs(tp-10) >= 1 {
|
||||
testEnv.Error(tp)
|
||||
}
|
||||
|
||||
fn := GetFalseNegatives("a", confusionMat)
|
||||
if math.Abs(fn-5) >= 1 {
|
||||
testEnv.Error(fn)
|
||||
}
|
||||
fn = GetFalseNegatives("b", confusionMat)
|
||||
if math.Abs(fn-10) >= 1 {
|
||||
testEnv.Error(fn)
|
||||
}
|
||||
|
||||
tn := GetTrueNegatives("a", confusionMat)
|
||||
if math.Abs(tn-10) >= 1 {
|
||||
testEnv.Error(tn)
|
||||
}
|
||||
tn = GetTrueNegatives("b", confusionMat)
|
||||
if math.Abs(tn-75) >= 1 {
|
||||
testEnv.Error(tn)
|
||||
}
|
||||
|
||||
fp := GetFalsePositives("a", confusionMat)
|
||||
if math.Abs(fp-10) >= 1 {
|
||||
testEnv.Error(fp)
|
||||
}
|
||||
|
||||
fp = GetFalsePositives("b", confusionMat)
|
||||
if math.Abs(fp-5) >= 1 {
|
||||
testEnv.Error(fp)
|
||||
}
|
||||
|
||||
precision := GetPrecision("a", confusionMat)
|
||||
recall := GetRecall("a", confusionMat)
|
||||
|
||||
if math.Abs(precision-0.88) >= 0.01 {
|
||||
testEnv.Error(precision)
|
||||
}
|
||||
|
||||
if math.Abs(recall-0.94) >= 0.01 {
|
||||
testEnv.Error(recall)
|
||||
}
|
||||
|
||||
precision = GetPrecision("b", confusionMat)
|
||||
recall = GetRecall("b", confusionMat)
|
||||
if math.Abs(precision-0.666) >= 0.01 {
|
||||
testEnv.Error(precision)
|
||||
}
|
||||
|
||||
if math.Abs(recall-0.50) >= 0.01 {
|
||||
testEnv.Error(recall)
|
||||
}
|
||||
|
||||
precision = GetMicroPrecision(confusionMat)
|
||||
if math.Abs(precision-0.85) >= 0.01 {
|
||||
testEnv.Error(precision)
|
||||
}
|
||||
|
||||
recall = GetMicroRecall(confusionMat)
|
||||
if math.Abs(recall-0.85) >= 0.01 {
|
||||
testEnv.Error(recall)
|
||||
}
|
||||
|
||||
precision = GetMacroPrecision(confusionMat)
|
||||
if math.Abs(precision-0.775) >= 0.01 {
|
||||
testEnv.Error(precision)
|
||||
}
|
||||
|
||||
recall = GetMacroRecall(confusionMat)
|
||||
if math.Abs(recall-0.719) > 0.01 {
|
||||
testEnv.Error(recall)
|
||||
}
|
||||
|
||||
fmeasure := GetF1Score("a", confusionMat)
|
||||
if math.Abs(fmeasure-0.91) >= 0.1 {
|
||||
testEnv.Error(fmeasure)
|
||||
}
|
||||
|
||||
accuracy := GetAccuracy(confusionMat)
|
||||
if math.Abs(accuracy-0.85) >= 0.1 {
|
||||
testEnv.Error(accuracy)
|
||||
}
|
||||
}
|
61
examples/datasets/chim.csv
Normal file
61
examples/datasets/chim.csv
Normal file
@ -0,0 +1,61 @@
|
||||
A,class
|
||||
1.3,c1
|
||||
1.3,c3
|
||||
1.3,c3
|
||||
1.3,c3
|
||||
1.3,c3
|
||||
1.4,c2
|
||||
1.8,c1
|
||||
1.8,c2
|
||||
1.8,c3
|
||||
2.4,c1
|
||||
2.4,c1
|
||||
2.4,c1
|
||||
2.4,c1
|
||||
2.4,c1
|
||||
2.4,c1
|
||||
2.4,c3
|
||||
2.4,c3
|
||||
6.5,c1
|
||||
6.5,c1
|
||||
6.5,c1
|
||||
6.5,c2
|
||||
6.5,c2
|
||||
6.5,c3
|
||||
6.5,c3
|
||||
6.5,c3
|
||||
6.5,c3
|
||||
8.7,c1
|
||||
8.7,c1
|
||||
8.7,c1
|
||||
8.7,c1
|
||||
8.7,c1
|
||||
8.7,c1
|
||||
8.7,c3
|
||||
12.1,c1
|
||||
12.1,c1
|
||||
12.1,c1
|
||||
12.1,c1
|
||||
12.1,c1
|
||||
12.1,c1
|
||||
12.1,c1
|
||||
12.1,c2
|
||||
12.1,c2
|
||||
12.1,c3
|
||||
12.1,c3
|
||||
12.1,c3
|
||||
29.4,c3
|
||||
56.2,c1
|
||||
56.2,c1
|
||||
56.2,c2
|
||||
56.2,c2
|
||||
56.2,c2
|
||||
56.2,c2
|
||||
87.1,c2
|
||||
87.1,c3
|
||||
87.1,c3
|
||||
87.1,c3
|
||||
89.0,c1
|
||||
89.0,c2
|
||||
89.0,c3
|
||||
89.0,c3
|
|
@ -147,4 +147,4 @@
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
|
|
152
examples/datasets/iris_binned.csv
Normal file
152
examples/datasets/iris_binned.csv
Normal file
@ -0,0 +1,152 @@
|
||||
Sepal length,Sepal width,Petal length,Petal width,Species
|
||||
2,3.5,1.4,0.2,Iris-setosa
|
||||
1,3,1.4,0.2,Iris-setosa
|
||||
1,3.2,1.3,0.2,Iris-setosa
|
||||
0,3.1,1.5,0.2,Iris-setosa
|
||||
1,3.6,1.4,0.2,Iris-setosa
|
||||
3,3.9,1.7,0.4,Iris-setosa
|
||||
0,3.4,1.4,0.3,Iris-setosa
|
||||
1,3.4,1.5,0.2,Iris-setosa
|
||||
0,2.9,1.4,0.2,Iris-setosa
|
||||
1,3.1,1.5,0.1,Iris-setosa
|
||||
3,3.7,1.5,0.2,Iris-setosa
|
||||
1,3.4,1.6,0.2,Iris-setosa
|
||||
1,3,1.4,0.1,Iris-setosa
|
||||
0,3,1.1,0.1,Iris-setosa
|
||||
4,4,1.2,0.2,Iris-setosa
|
||||
3,4.4,1.5,0.4,Iris-setosa
|
||||
3,3.9,1.3,0.4,Iris-setosa
|
||||
2,3.5,1.4,0.3,Iris-setosa
|
||||
3,3.8,1.7,0.3,Iris-setosa
|
||||
2,3.8,1.5,0.3,Iris-setosa
|
||||
3,3.4,1.7,0.2,Iris-setosa
|
||||
2,3.7,1.5,0.4,Iris-setosa
|
||||
0,3.6,1,0.2,Iris-setosa
|
||||
2,3.3,1.7,0.5,Iris-setosa
|
||||
1,3.4,1.9,0.2,Iris-setosa
|
||||
1,3,1.6,0.2,Iris-setosa
|
||||
1,3.4,1.6,0.4,Iris-setosa
|
||||
2,3.5,1.5,0.2,Iris-setosa
|
||||
2,3.4,1.4,0.2,Iris-setosa
|
||||
1,3.2,1.6,0.2,Iris-setosa
|
||||
1,3.1,1.6,0.2,Iris-setosa
|
||||
3,3.4,1.5,0.4,Iris-setosa
|
||||
2,4.1,1.5,0.1,Iris-setosa
|
||||
3,4.2,1.4,0.2,Iris-setosa
|
||||
1,3.1,1.5,0.1,Iris-setosa
|
||||
1,3.2,1.2,0.2,Iris-setosa
|
||||
3,3.5,1.3,0.2,Iris-setosa
|
||||
1,3.1,1.5,0.1,Iris-setosa
|
||||
0,3,1.3,0.2,Iris-setosa
|
||||
2,3.4,1.5,0.2,Iris-setosa
|
||||
1,3.5,1.3,0.3,Iris-setosa
|
||||
0,2.3,1.3,0.3,Iris-setosa
|
||||
0,3.2,1.3,0.2,Iris-setosa
|
||||
1,3.5,1.6,0.6,Iris-setosa
|
||||
2,3.8,1.9,0.4,Iris-setosa
|
||||
1,3,1.4,0.3,Iris-setosa
|
||||
2,3.8,1.6,0.2,Iris-setosa
|
||||
0,3.2,1.4,0.2,Iris-setosa
|
||||
2,3.7,1.5,0.2,Iris-setosa
|
||||
1,3.3,1.4,0.2,Iris-setosa
|
||||
7,3.2,4.7,1.4,Iris-versicolor
|
||||
5,3.2,4.5,1.5,Iris-versicolor
|
||||
7,3.1,4.9,1.5,Iris-versicolor
|
||||
3,2.3,4,1.3,Iris-versicolor
|
||||
6,2.8,4.6,1.5,Iris-versicolor
|
||||
3,2.8,4.5,1.3,Iris-versicolor
|
||||
5,3.3,4.7,1.6,Iris-versicolor
|
||||
1,2.4,3.3,1,Iris-versicolor
|
||||
6,2.9,4.6,1.3,Iris-versicolor
|
||||
2,2.7,3.9,1.4,Iris-versicolor
|
||||
1,2,3.5,1,Iris-versicolor
|
||||
4,3,4.2,1.5,Iris-versicolor
|
||||
4,2.2,4,1,Iris-versicolor
|
||||
5,2.9,4.7,1.4,Iris-versicolor
|
||||
3,2.9,3.6,1.3,Iris-versicolor
|
||||
6,3.1,4.4,1.4,Iris-versicolor
|
||||
3,3,4.5,1.5,Iris-versicolor
|
||||
4,2.7,4.1,1,Iris-versicolor
|
||||
5,2.2,4.5,1.5,Iris-versicolor
|
||||
3,2.5,3.9,1.1,Iris-versicolor
|
||||
4,3.2,4.8,1.8,Iris-versicolor
|
||||
5,2.8,4,1.3,Iris-versicolor
|
||||
5,2.5,4.9,1.5,Iris-versicolor
|
||||
5,2.8,4.7,1.2,Iris-versicolor
|
||||
5,2.9,4.3,1.3,Iris-versicolor
|
||||
6,3,4.4,1.4,Iris-versicolor
|
||||
6,2.8,4.8,1.4,Iris-versicolor
|
||||
6,3,5,1.7,Iris-versicolor
|
||||
4,2.9,4.5,1.5,Iris-versicolor
|
||||
3,2.6,3.5,1,Iris-versicolor
|
||||
3,2.4,3.8,1.1,Iris-versicolor
|
||||
3,2.4,3.7,1,Iris-versicolor
|
||||
4,2.7,3.9,1.2,Iris-versicolor
|
||||
4,2.7,5.1,1.6,Iris-versicolor
|
||||
3,3,4.5,1.5,Iris-versicolor
|
||||
4,3.4,4.5,1.6,Iris-versicolor
|
||||
6,3.1,4.7,1.5,Iris-versicolor
|
||||
5,2.3,4.4,1.3,Iris-versicolor
|
||||
3,3,4.1,1.3,Iris-versicolor
|
||||
3,2.5,4,1.3,Iris-versicolor
|
||||
3,2.6,4.4,1.2,Iris-versicolor
|
||||
5,3,4.6,1.4,Iris-versicolor
|
||||
4,2.6,4,1.2,Iris-versicolor
|
||||
1,2.3,3.3,1,Iris-versicolor
|
||||
3,2.7,4.2,1.3,Iris-versicolor
|
||||
3,3,4.2,1.2,Iris-versicolor
|
||||
3,2.9,4.2,1.3,Iris-versicolor
|
||||
5,2.9,4.3,1.3,Iris-versicolor
|
||||
2,2.5,3,1.1,Iris-versicolor
|
||||
3,2.8,4.1,1.3,Iris-versicolor
|
||||
5,3.3,6,2.5,Iris-virginica
|
||||
4,2.7,5.1,1.9,Iris-virginica
|
||||
7,3,5.9,2.1,Iris-virginica
|
||||
5,2.9,5.6,1.8,Iris-virginica
|
||||
6,3,5.8,2.2,Iris-virginica
|
||||
9,3,6.6,2.1,Iris-virginica
|
||||
1,2.5,4.5,1.7,Iris-virginica
|
||||
8,2.9,6.3,1.8,Iris-virginica
|
||||
6,2.5,5.8,1.8,Iris-virginica
|
||||
8,3.6,6.1,2.5,Iris-virginica
|
||||
6,3.2,5.1,2,Iris-virginica
|
||||
5,2.7,5.3,1.9,Iris-virginica
|
||||
6,3,5.5,2.1,Iris-virginica
|
||||
3,2.5,5,2,Iris-virginica
|
||||
4,2.8,5.1,2.4,Iris-virginica
|
||||
5,3.2,5.3,2.3,Iris-virginica
|
||||
6,3,5.5,1.8,Iris-virginica
|
||||
9,3.8,6.7,2.2,Iris-virginica
|
||||
9,2.6,6.9,2.3,Iris-virginica
|
||||
4,2.2,5,1.5,Iris-virginica
|
||||
7,3.2,5.7,2.3,Iris-virginica
|
||||
3,2.8,4.9,2,Iris-virginica
|
||||
9,2.8,6.7,2,Iris-virginica
|
||||
5,2.7,4.9,1.8,Iris-virginica
|
||||
6,3.3,5.7,2.1,Iris-virginica
|
||||
8,3.2,6,1.8,Iris-virginica
|
||||
5,2.8,4.8,1.8,Iris-virginica
|
||||
5,3,4.9,1.8,Iris-virginica
|
||||
5,2.8,5.6,2.1,Iris-virginica
|
||||
8,3,5.8,1.6,Iris-virginica
|
||||
8,2.8,6.1,1.9,Iris-virginica
|
||||
9,3.8,6.4,2,Iris-virginica
|
||||
5,2.8,5.6,2.2,Iris-virginica
|
||||
5,2.8,5.1,1.5,Iris-virginica
|
||||
5,2.6,5.6,1.4,Iris-virginica
|
||||
9,3,6.1,2.3,Iris-virginica
|
||||
5,3.4,5.6,2.4,Iris-virginica
|
||||
5,3.1,5.5,1.8,Iris-virginica
|
||||
4,3,4.8,1.8,Iris-virginica
|
||||
7,3.1,5.4,2.1,Iris-virginica
|
||||
6,3.1,5.6,2.4,Iris-virginica
|
||||
7,3.1,5.1,2.3,Iris-virginica
|
||||
4,2.7,5.1,1.9,Iris-virginica
|
||||
6,3.2,5.9,2.3,Iris-virginica
|
||||
6,3.3,5.7,2.5,Iris-virginica
|
||||
6,3,5.2,2.3,Iris-virginica
|
||||
5,2.5,5,1.9,Iris-virginica
|
||||
6,3,5.2,2,Iris-virginica
|
||||
5,3.4,5.4,2.3,Iris-virginica
|
||||
4,3,5.1,1.8,Iris-virginica
|
||||
|
|
151
examples/datasets/iris_headers.csv
Normal file
151
examples/datasets/iris_headers.csv
Normal file
@ -0,0 +1,151 @@
|
||||
Sepal length, Sepal width,Petal length, Petal width, Species
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
|
151
examples/datasets/iris_sorted_asc.csv
Normal file
151
examples/datasets/iris_sorted_asc.csv
Normal file
@ -0,0 +1,151 @@
|
||||
Sepal length, Sepal width,Petal length, Petal width, Species
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
|
151
examples/datasets/iris_sorted_desc.csv
Normal file
151
examples/datasets/iris_sorted_desc.csv
Normal file
@ -0,0 +1,151 @@
|
||||
Sepal length, Sepal width,Petal length, Petal width, Species
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
|
69
examples/instances/instances.go
Normal file
69
examples/instances/instances.go
Normal file
@ -0,0 +1,69 @@
|
||||
package main
|
||||
|
||||
// This example program demonstrates Instances
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
base "github.com/sjwhitworth/golearn/base"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
// Instances can be read using ParseCsvToInstances
|
||||
rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Instances can be printed, and you'll see a human-readable summary
|
||||
// if you do so. The first section is a line like
|
||||
// Instances with 150 row(s) and 5 attribute(s)
|
||||
//
|
||||
// It next prints all the attributes
|
||||
// FloatAttribute(Sepal length)
|
||||
// FloatAttribute(Sepal width)
|
||||
// FloatAttribute(Petal length)
|
||||
// FloatAttribute(Petal width)
|
||||
// CategoricalAttribute([Iris-setosa Iris-versicolor Iris-viriginica])
|
||||
// The final attribute has an asterisk (*) printed before it,
|
||||
// meaning that it is the class variable. It then prints out up to
|
||||
// 30 rows which correspond to those attributes.
|
||||
// 5.10 3.50 1.40 0.20 Iris-setosa
|
||||
// 4.90 3.00 1.40 0.20 Iris-setosa
|
||||
fmt.Println(rawData)
|
||||
|
||||
// If two decimal places isn't enough, you can update the
|
||||
// Precision field on any FloatAttribute
|
||||
if attr, ok := rawData.GetAttr(0).(*base.FloatAttribute); !ok {
|
||||
panic("Invalid cast")
|
||||
} else {
|
||||
attr.Precision = 4
|
||||
}
|
||||
// Now the first column has more precision
|
||||
fmt.Println(rawData)
|
||||
|
||||
// We can update the set of Instances, although the API
|
||||
// for doing so is not very sophisticated.
|
||||
rawData.SetAttrStr(0, 0, "1.00")
|
||||
rawData.SetAttrStr(0, rawData.ClassIndex, "Iris-unusual")
|
||||
fmt.Println(rawData)
|
||||
|
||||
// There is a way of creating new Instances from scratch.
|
||||
// Inside an Instance, everything's stored as float64
|
||||
newData := make([]float64, 2)
|
||||
newData[0] = 1.0
|
||||
newData[1] = 0.0
|
||||
|
||||
// Let's create some attributes
|
||||
attrs := make([]base.Attribute, 2)
|
||||
attrs[0] = base.NewFloatAttribute()
|
||||
attrs[0].SetName("Arbitrary Float Quantity")
|
||||
attrs[1] = new(base.CategoricalAttribute)
|
||||
attrs[1].SetName("Class")
|
||||
// Insert a standard class
|
||||
attrs[1].GetSysValFromString("A")
|
||||
|
||||
// Now let's create the final instances set
|
||||
newInst := base.NewInstancesFromRaw(attrs, 1, newData)
|
||||
fmt.Println(newInst)
|
||||
}
|
32
examples/knnclassifier/knnclassifier_iris.go
Normal file
32
examples/knnclassifier/knnclassifier_iris.go
Normal file
@ -0,0 +1,32 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
base "github.com/sjwhitworth/golearn/base"
|
||||
evaluation "github.com/sjwhitworth/golearn/evaluation"
|
||||
knn "github.com/sjwhitworth/golearn/knn"
|
||||
)
|
||||
|
||||
func main() {
|
||||
rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rawData.Shuffle()
|
||||
//Initialises a new KNN classifier
|
||||
cls := knn.NewKnnClassifier("euclidean", 2)
|
||||
|
||||
//Do a training-test split
|
||||
trainTest := base.InstancesTrainTestSplit(rawData, 0.50)
|
||||
trainData := trainTest[0]
|
||||
testData := trainTest[1]
|
||||
cls.Fit(trainData)
|
||||
|
||||
//Calculates the Euclidean distance and returns the most popular label
|
||||
predictions := cls.Predict(testData)
|
||||
fmt.Println(predictions)
|
||||
|
||||
// Prints precision/recall metrics
|
||||
confusionMat := evaluation.GetConfusionMatrix(testData, predictions)
|
||||
fmt.Println(evaluation.GetSummary(confusionMat))
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
data "github.com/sjwhitworth/golearn/data"
|
||||
knn "github.com/sjwhitworth/golearn/knn"
|
||||
util "github.com/sjwhitworth/golearn/utilities"
|
||||
)
|
||||
|
||||
func main() {
|
||||
//Parses the infamous Iris data.
|
||||
cols, rows, _, labels, data := data.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2})
|
||||
|
||||
//Initialises a new KNN classifier
|
||||
cls := knn.NewKnnClassifier("euclidean")
|
||||
cls.Fit(labels, data, rows, cols)
|
||||
|
||||
for {
|
||||
//Creates a random array of N float64s between 0 and 7
|
||||
randArray := util.RandomArray(3, 7)
|
||||
|
||||
//Calculates the Euclidean distance and returns the most popular label
|
||||
labels := cls.Predict(randArray, 3)
|
||||
fmt.Println(labels)
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/gonum/matrix/mat64"
|
||||
data "github.com/sjwhitworth/golearn/data"
|
||||
knn "github.com/sjwhitworth/golearn/knn"
|
||||
util "github.com/sjwhitworth/golearn/utilities"
|
||||
)
|
||||
|
||||
func main() {
|
||||
//Parses the infamous Iris data.
|
||||
cols, rows, _, labels, data := data.ParseCsv("datasets/randomdata.csv", 2, []int{0, 1})
|
||||
newlabels := util.ConvertLabelsToFloat(labels)
|
||||
|
||||
//Initialises a new KNN classifier
|
||||
cls := knn.NewKnnRegressor("euclidean")
|
||||
cls.Fit(newlabels, data, rows, cols)
|
||||
|
||||
for {
|
||||
//Creates a random array of N float64s between 0 and Y
|
||||
randArray := util.RandomArray(2, 100)
|
||||
|
||||
//Initialises a vector with this array
|
||||
random := mat64.NewDense(1, 2, randArray)
|
||||
|
||||
//Calculates the Euclidean distance and returns the most popular label
|
||||
outcome := cls.Predict(random, 3)
|
||||
fmt.Println(outcome)
|
||||
}
|
||||
}
|
BIN
ext/lib/liblinear.so
Executable file
BIN
ext/lib/liblinear.so
Executable file
Binary file not shown.
BIN
ext/lib/linear.dll
Executable file
BIN
ext/lib/linear.dll
Executable file
Binary file not shown.
BIN
ext/liblinear_src/blas/blas.a
Normal file
BIN
ext/liblinear_src/blas/blas.a
Normal file
Binary file not shown.
BIN
ext/liblinear_src/blas/daxpy.o
Normal file
BIN
ext/liblinear_src/blas/daxpy.o
Normal file
Binary file not shown.
BIN
ext/liblinear_src/blas/ddot.o
Normal file
BIN
ext/liblinear_src/blas/ddot.o
Normal file
Binary file not shown.
BIN
ext/liblinear_src/blas/dnrm2.o
Normal file
BIN
ext/liblinear_src/blas/dnrm2.o
Normal file
Binary file not shown.
BIN
ext/liblinear_src/blas/dscal.o
Normal file
BIN
ext/liblinear_src/blas/dscal.o
Normal file
Binary file not shown.
BIN
ext/liblinear_src/linear.dll
Executable file
BIN
ext/liblinear_src/linear.dll
Executable file
Binary file not shown.
BIN
ext/liblinear_src/linear.o
Normal file
BIN
ext/liblinear_src/linear.o
Normal file
Binary file not shown.
BIN
ext/liblinear_src/tron.o
Normal file
BIN
ext/liblinear_src/tron.o
Normal file
Binary file not shown.
121
filters/binning.go
Normal file
121
filters/binning.go
Normal file
@ -0,0 +1,121 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
base "github.com/sjwhitworth/golearn/base"
|
||||
"math"
|
||||
)
|
||||
|
||||
// BinningFilter does equal-width binning for numeric
|
||||
// Attributes (aka "histogram binning")
|
||||
type BinningFilter struct {
|
||||
Attributes []int
|
||||
Instances *base.Instances
|
||||
BinCount int
|
||||
MinVals map[int]float64
|
||||
MaxVals map[int]float64
|
||||
trained bool
|
||||
}
|
||||
|
||||
// NewBinningFilter creates a BinningFilter structure
|
||||
// with some helpful default initialisations.
|
||||
func NewBinningFilter(inst *base.Instances, bins int) BinningFilter {
|
||||
return BinningFilter{
|
||||
make([]int, 0),
|
||||
inst,
|
||||
bins,
|
||||
make(map[int]float64),
|
||||
make(map[int]float64),
|
||||
false,
|
||||
}
|
||||
}
|
||||
|
||||
// AddAttribute adds the index of the given attribute `a'
|
||||
// to the BinningFilter for discretisation.
|
||||
func (b *BinningFilter) AddAttribute(a base.Attribute) {
|
||||
attrIndex := b.Instances.GetAttrIndex(a)
|
||||
if attrIndex == -1 {
|
||||
panic("invalid attribute")
|
||||
}
|
||||
b.Attributes = append(b.Attributes, attrIndex)
|
||||
}
|
||||
|
||||
// AddAllNumericAttributes adds every suitable attribute
|
||||
// to the BinningFilter for discretiation
|
||||
func (b *BinningFilter) AddAllNumericAttributes() {
|
||||
for i := 0; i < b.Instances.Cols; i++ {
|
||||
if i == b.Instances.ClassIndex {
|
||||
continue
|
||||
}
|
||||
attr := b.Instances.GetAttr(i)
|
||||
if attr.GetType() != base.Float64Type {
|
||||
continue
|
||||
}
|
||||
b.Attributes = append(b.Attributes, i)
|
||||
}
|
||||
}
|
||||
|
||||
// Build computes and stores the bin values
|
||||
// for the training instances.
|
||||
func (b *BinningFilter) Build() {
|
||||
for _, attr := range b.Attributes {
|
||||
maxVal := math.Inf(-1)
|
||||
minVal := math.Inf(1)
|
||||
for i := 0; i < b.Instances.Rows; i++ {
|
||||
val := b.Instances.Get(i, attr)
|
||||
if val > maxVal {
|
||||
maxVal = val
|
||||
}
|
||||
if val < minVal {
|
||||
minVal = val
|
||||
}
|
||||
}
|
||||
b.MaxVals[attr] = maxVal
|
||||
b.MinVals[attr] = minVal
|
||||
b.trained = true
|
||||
}
|
||||
}
|
||||
|
||||
// Run applies a trained BinningFilter to a set of Instances,
|
||||
// discretising any numeric attributes added.
|
||||
//
|
||||
// IMPORTANT: Run discretises in-place, so make sure to take
|
||||
// a copy if the original instances are still needed
|
||||
//
|
||||
// IMPORTANT: This function panic()s if the filter has not been
|
||||
// trained. Call Build() before running this function
|
||||
//
|
||||
// IMPORTANT: Call Build() after adding any additional attributes.
|
||||
// Otherwise, the training structure will be out of date from
|
||||
// the values expected and could cause a panic.
|
||||
func (b *BinningFilter) Run(on *base.Instances) {
|
||||
if !b.trained {
|
||||
panic("Call Build() beforehand")
|
||||
}
|
||||
for attr := range b.Attributes {
|
||||
minVal := b.MinVals[attr]
|
||||
maxVal := b.MaxVals[attr]
|
||||
disc := 0
|
||||
// Casts to float32 to replicate a floating point precision error
|
||||
delta := float32(maxVal - minVal)
|
||||
delta /= float32(b.BinCount)
|
||||
for i := 0; i < on.Rows; i++ {
|
||||
val := on.Get(i, attr)
|
||||
if val <= minVal {
|
||||
disc = 0
|
||||
} else {
|
||||
disc = int(math.Floor(float64(float32(val-minVal) / delta)))
|
||||
if disc >= b.BinCount {
|
||||
disc = b.BinCount - 1
|
||||
}
|
||||
}
|
||||
on.Set(i, attr, float64(disc))
|
||||
}
|
||||
newAttribute := new(base.CategoricalAttribute)
|
||||
newAttribute.SetName(on.GetAttr(attr).GetName())
|
||||
for i := 0; i < b.BinCount; i++ {
|
||||
newAttribute.GetSysValFromString(fmt.Sprintf("%d", i))
|
||||
}
|
||||
on.ReplaceAttr(attr, newAttribute)
|
||||
}
|
||||
}
|
28
filters/binning_test.go
Normal file
28
filters/binning_test.go
Normal file
@ -0,0 +1,28 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
base "github.com/sjwhitworth/golearn/base"
|
||||
"math"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBinning(testEnv *testing.T) {
|
||||
inst1, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
|
||||
inst2, err := base.ParseCSVToInstances("../examples/datasets/iris_binned.csv", true)
|
||||
inst3, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
filt := NewBinningFilter(inst1, 10)
|
||||
filt.AddAttribute(inst1.GetAttr(0))
|
||||
filt.Build()
|
||||
filt.Run(inst1)
|
||||
for i := 0; i < inst1.Rows; i++ {
|
||||
val1 := inst1.Get(i, 0)
|
||||
val2 := inst2.Get(i, 0)
|
||||
val3 := inst3.Get(i, 0)
|
||||
if math.Abs(val1-val2) >= 1 {
|
||||
testEnv.Error(val1, val2, val3, i)
|
||||
}
|
||||
}
|
||||
}
|
365
filters/chimerge.go
Normal file
365
filters/chimerge.go
Normal file
@ -0,0 +1,365 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
base "github.com/sjwhitworth/golearn/base"
|
||||
"math"
|
||||
)
|
||||
|
||||
// ChiMergeFilter implements supervised discretisation
|
||||
// by merging successive numeric intervals if the difference
|
||||
// in their class distribution is not statistically signficant.
|
||||
// See Bramer, "Principles of Data Mining", 2nd Edition
|
||||
// pp 105--115
|
||||
type ChiMergeFilter struct {
|
||||
Attributes []int
|
||||
Instances *base.Instances
|
||||
Tables map[int][]*FrequencyTableEntry
|
||||
Significance float64
|
||||
MinRows int
|
||||
MaxRows int
|
||||
_Trained bool
|
||||
}
|
||||
|
||||
// Create a ChiMergeFilter with some helpful intialisations.
|
||||
func NewChiMergeFilter(inst *base.Instances, significance float64) ChiMergeFilter {
|
||||
return ChiMergeFilter{
|
||||
make([]int, 0),
|
||||
inst,
|
||||
make(map[int][]*FrequencyTableEntry),
|
||||
significance,
|
||||
0,
|
||||
0,
|
||||
false,
|
||||
}
|
||||
}
|
||||
|
||||
// Build trains a ChiMergeFilter on the ChiMergeFilter.Instances given
|
||||
func (c *ChiMergeFilter) Build() {
|
||||
for _, attr := range c.Attributes {
|
||||
tab := chiMerge(c.Instances, attr, c.Significance, c.MinRows, c.MaxRows)
|
||||
c.Tables[attr] = tab
|
||||
c._Trained = true
|
||||
}
|
||||
}
|
||||
|
||||
// Run discretises the set of Instances `on'
|
||||
//
|
||||
// IMPORTANT: ChiMergeFilter discretises in place.
|
||||
func (c *ChiMergeFilter) Run(on *base.Instances) {
|
||||
if !c._Trained {
|
||||
panic("Call Build() beforehand")
|
||||
}
|
||||
for attr := range c.Tables {
|
||||
table := c.Tables[attr]
|
||||
for i := 0; i < on.Rows; i++ {
|
||||
val := on.Get(i, attr)
|
||||
dis := 0
|
||||
for j, k := range table {
|
||||
if k.Value < val {
|
||||
dis = j
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
on.Set(i, attr, float64(dis))
|
||||
}
|
||||
newAttribute := new(base.CategoricalAttribute)
|
||||
newAttribute.SetName(on.GetAttr(attr).GetName())
|
||||
for _, k := range table {
|
||||
newAttribute.GetSysValFromString(fmt.Sprintf("%f", k.Value))
|
||||
}
|
||||
on.ReplaceAttr(attr, newAttribute)
|
||||
}
|
||||
}
|
||||
|
||||
// AddAttribute add a given numeric Attribute `attr' to the
|
||||
// filter.
|
||||
//
|
||||
// IMPORTANT: This function panic()s if it can't locate the
|
||||
// attribute in the Instances set.
|
||||
func (c *ChiMergeFilter) AddAttribute(attr base.Attribute) {
|
||||
if attr.GetType() != base.Float64Type {
|
||||
panic("ChiMerge only works on Float64Attributes")
|
||||
}
|
||||
attrIndex := c.Instances.GetAttrIndex(attr)
|
||||
if attrIndex == -1 {
|
||||
panic("Invalid attribute!")
|
||||
}
|
||||
c.Attributes = append(c.Attributes, attrIndex)
|
||||
}
|
||||
|
||||
type FrequencyTableEntry struct {
|
||||
Value float64
|
||||
Frequency map[string]int
|
||||
}
|
||||
|
||||
func (t *FrequencyTableEntry) String() string {
|
||||
return fmt.Sprintf("%.2f %s", t.Value, t.Frequency)
|
||||
}
|
||||
|
||||
func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEntry {
|
||||
ret := make([]*FrequencyTableEntry, 0)
|
||||
var attribute *base.FloatAttribute
|
||||
attribute, ok := inst.GetAttr(attr).(*base.FloatAttribute)
|
||||
if !ok {
|
||||
panic("only use Chi-M on numeric stuff")
|
||||
}
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
value := inst.Get(i, attr)
|
||||
valueConv := attribute.GetUsrVal(value)
|
||||
class := inst.GetClass(i)
|
||||
// Search the frequency table for the value
|
||||
found := false
|
||||
for _, entry := range ret {
|
||||
if entry.Value == valueConv {
|
||||
found = true
|
||||
entry.Frequency[class] += 1
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
newEntry := &FrequencyTableEntry{
|
||||
valueConv,
|
||||
make(map[string]int),
|
||||
}
|
||||
newEntry.Frequency[class] = 1
|
||||
ret = append(ret, newEntry)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func chiSquaredPdf(k float64, x float64) float64 {
|
||||
if x < 0 {
|
||||
return 0
|
||||
}
|
||||
top := math.Pow(x, (k/2)-1) * math.Exp(-x/2)
|
||||
bottom := math.Pow(2, k/2) * math.Gamma(k/2)
|
||||
return top / bottom
|
||||
}
|
||||
|
||||
func chiSquaredPercentile(k int, x float64) float64 {
|
||||
// Implements Yahya et al.'s "A Numerical Procedure
|
||||
// for Computing Chi-Square Percentage Points"
|
||||
// InterStat Journal 01/2007; April 25:page:1-8.
|
||||
steps := 32
|
||||
intervals := 4 * steps
|
||||
w := x / (4.0 * float64(steps))
|
||||
values := make([]float64, intervals+1)
|
||||
for i := 0; i < intervals+1; i++ {
|
||||
c := w * float64(i)
|
||||
v := chiSquaredPdf(float64(k), c)
|
||||
values[i] = v
|
||||
}
|
||||
|
||||
ret1 := values[0] + values[len(values)-1]
|
||||
ret2 := 0.0
|
||||
ret3 := 0.0
|
||||
ret4 := 0.0
|
||||
|
||||
for i := 2; i < intervals-1; i += 4 {
|
||||
ret2 += values[i]
|
||||
}
|
||||
|
||||
for i := 4; i < intervals-3; i += 4 {
|
||||
ret3 += values[i]
|
||||
}
|
||||
|
||||
for i := 1; i < intervals; i += 2 {
|
||||
ret4 += values[i]
|
||||
}
|
||||
|
||||
return (2.0 * w / 45) * (7*ret1 + 12*ret2 + 14*ret3 + 32*ret4)
|
||||
}
|
||||
|
||||
func chiCountClasses(entries []*FrequencyTableEntry) map[string]int {
|
||||
classCounter := make(map[string]int)
|
||||
for _, e := range entries {
|
||||
for k := range e.Frequency {
|
||||
classCounter[k] += e.Frequency[k]
|
||||
}
|
||||
}
|
||||
return classCounter
|
||||
}
|
||||
|
||||
func chiComputeStatistic(entry1 *FrequencyTableEntry, entry2 *FrequencyTableEntry) float64 {
|
||||
|
||||
// Sum the number of things observed per class
|
||||
classCounter := make(map[string]int)
|
||||
for k := range entry1.Frequency {
|
||||
classCounter[k] += entry1.Frequency[k]
|
||||
}
|
||||
for k := range entry2.Frequency {
|
||||
classCounter[k] += entry2.Frequency[k]
|
||||
}
|
||||
|
||||
// Sum the number of things observed per value
|
||||
entryObservations1 := 0
|
||||
entryObservations2 := 0
|
||||
for k := range entry1.Frequency {
|
||||
entryObservations1 += entry1.Frequency[k]
|
||||
}
|
||||
for k := range entry2.Frequency {
|
||||
entryObservations2 += entry2.Frequency[k]
|
||||
}
|
||||
|
||||
totalObservations := entryObservations1 + entryObservations2
|
||||
// Compute the expected values per class
|
||||
expectedClassValues1 := make(map[string]float64)
|
||||
expectedClassValues2 := make(map[string]float64)
|
||||
for k := range classCounter {
|
||||
expectedClassValues1[k] = float64(classCounter[k])
|
||||
expectedClassValues1[k] *= float64(entryObservations1)
|
||||
expectedClassValues1[k] /= float64(totalObservations)
|
||||
}
|
||||
for k := range classCounter {
|
||||
expectedClassValues2[k] = float64(classCounter[k])
|
||||
expectedClassValues2[k] *= float64(entryObservations2)
|
||||
expectedClassValues2[k] /= float64(totalObservations)
|
||||
}
|
||||
|
||||
// Compute chi-squared value
|
||||
chiSum := 0.0
|
||||
for k := range expectedClassValues1 {
|
||||
numerator := float64(entry1.Frequency[k])
|
||||
numerator -= expectedClassValues1[k]
|
||||
numerator = math.Pow(numerator, 2)
|
||||
denominator := float64(expectedClassValues1[k])
|
||||
if denominator < 0.5 {
|
||||
denominator = 0.5
|
||||
}
|
||||
chiSum += numerator / denominator
|
||||
}
|
||||
for k := range expectedClassValues2 {
|
||||
numerator := float64(entry2.Frequency[k])
|
||||
numerator -= expectedClassValues2[k]
|
||||
numerator = math.Pow(numerator, 2)
|
||||
denominator := float64(expectedClassValues2[k])
|
||||
if denominator < 0.5 {
|
||||
denominator = 0.5
|
||||
}
|
||||
chiSum += numerator / denominator
|
||||
}
|
||||
|
||||
return chiSum
|
||||
}
|
||||
|
||||
func chiMergeMergeZipAdjacent(freq []*FrequencyTableEntry, minIndex int) []*FrequencyTableEntry {
|
||||
mergeEntry1 := freq[minIndex]
|
||||
mergeEntry2 := freq[minIndex+1]
|
||||
classCounter := make(map[string]int)
|
||||
for k := range mergeEntry1.Frequency {
|
||||
classCounter[k] += mergeEntry1.Frequency[k]
|
||||
}
|
||||
for k := range mergeEntry2.Frequency {
|
||||
classCounter[k] += mergeEntry2.Frequency[k]
|
||||
}
|
||||
newVal := freq[minIndex].Value
|
||||
newEntry := &FrequencyTableEntry{
|
||||
newVal,
|
||||
classCounter,
|
||||
}
|
||||
lowerSlice := freq
|
||||
upperSlice := freq
|
||||
if minIndex > 0 {
|
||||
lowerSlice = freq[0:minIndex]
|
||||
upperSlice = freq[minIndex+1:]
|
||||
} else {
|
||||
lowerSlice = make([]*FrequencyTableEntry, 0)
|
||||
upperSlice = freq[1:]
|
||||
}
|
||||
upperSlice[0] = newEntry
|
||||
freq = append(lowerSlice, upperSlice...)
|
||||
return freq
|
||||
}
|
||||
|
||||
func chiMergePrintTable(freq []*FrequencyTableEntry) {
|
||||
classes := chiCountClasses(freq)
|
||||
fmt.Printf("Attribute value\t")
|
||||
for k := range classes {
|
||||
fmt.Printf("\t%s", k)
|
||||
}
|
||||
fmt.Printf("\tTotal\n")
|
||||
for _, f := range freq {
|
||||
fmt.Printf("%.2f\t", f.Value)
|
||||
total := 0
|
||||
for k := range classes {
|
||||
fmt.Printf("\t%d", f.Frequency[k])
|
||||
total += f.Frequency[k]
|
||||
}
|
||||
fmt.Printf("\t%d\n", total)
|
||||
}
|
||||
}
|
||||
|
||||
// Produces a value mapping table
|
||||
// inst: The base.Instances which need discretising
|
||||
// sig: The significance level (e.g. 0.95)
|
||||
// minrows: The minimum number of rows required in the frequency table
|
||||
// maxrows: The maximum number of rows allowed in the frequency table
|
||||
// If the number of rows is above this, statistically signficant
|
||||
// adjacent rows will be merged
|
||||
// precision: internal number of decimal places to round E value to
|
||||
// (useful for verification)
|
||||
func chiMerge(inst *base.Instances, attr int, sig float64, minrows int, maxrows int) []*FrequencyTableEntry {
|
||||
|
||||
// Parameter sanity checking
|
||||
if !(2 <= minrows) {
|
||||
minrows = 2
|
||||
}
|
||||
if !(minrows < maxrows) {
|
||||
maxrows = minrows + 1
|
||||
}
|
||||
if sig == 0 {
|
||||
sig = 10
|
||||
}
|
||||
|
||||
// Build a frequency table
|
||||
freq := ChiMBuildFrequencyTable(attr, inst)
|
||||
// Count the number of classes
|
||||
classes := chiCountClasses(freq)
|
||||
for {
|
||||
// chiMergePrintTable(freq) DEBUG
|
||||
if len(freq) <= minrows {
|
||||
break
|
||||
}
|
||||
minChiVal := math.Inf(1)
|
||||
// There may be more than one index to merge
|
||||
minChiIndexes := make([]int, 0)
|
||||
for i := 0; i < len(freq)-1; i++ {
|
||||
chiVal := chiComputeStatistic(freq[i], freq[i+1])
|
||||
if chiVal < minChiVal {
|
||||
minChiVal = chiVal
|
||||
minChiIndexes = make([]int, 0)
|
||||
}
|
||||
if chiVal == minChiVal {
|
||||
minChiIndexes = append(minChiIndexes, i)
|
||||
}
|
||||
}
|
||||
// Only merge if:
|
||||
// We're above the maximum number of rows
|
||||
// OR the chiVal is significant
|
||||
// AS LONG AS we're above the minimum row count
|
||||
merge := false
|
||||
if len(freq) > maxrows {
|
||||
merge = true
|
||||
}
|
||||
// Compute the degress of freedom |classes - 1| * |rows - 1|
|
||||
degsOfFree := len(classes) - 1
|
||||
sigVal := chiSquaredPercentile(degsOfFree, minChiVal)
|
||||
if sigVal < sig {
|
||||
merge = true
|
||||
}
|
||||
// If we don't need to merge, then break
|
||||
if !merge {
|
||||
break
|
||||
}
|
||||
// Otherwise merge the rows i, i+1 by taking
|
||||
// The higher of the two things as the value
|
||||
// Combining the class frequencies
|
||||
for i, v := range minChiIndexes {
|
||||
freq = chiMergeMergeZipAdjacent(freq, v-i)
|
||||
}
|
||||
}
|
||||
return freq
|
||||
}
|
149
filters/chimerge_test.go
Normal file
149
filters/chimerge_test.go
Normal file
@ -0,0 +1,149 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
base "github.com/sjwhitworth/golearn/base"
|
||||
"math"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestChiMFreqTable(testEnv *testing.T) {
|
||||
|
||||
inst, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
freq := ChiMBuildFrequencyTable(0, inst)
|
||||
|
||||
if freq[0].Frequency["c1"] != 1 {
|
||||
testEnv.Error("Wrong frequency")
|
||||
}
|
||||
if freq[0].Frequency["c3"] != 4 {
|
||||
testEnv.Error("Wrong frequency %s", freq[1])
|
||||
}
|
||||
if freq[10].Frequency["c2"] != 1 {
|
||||
testEnv.Error("Wrong frequency")
|
||||
}
|
||||
}
|
||||
|
||||
func TestChiClassCounter(testEnv *testing.T) {
|
||||
inst, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
freq := ChiMBuildFrequencyTable(0, inst)
|
||||
classes := chiCountClasses(freq)
|
||||
if classes["c1"] != 27 {
|
||||
testEnv.Error(classes)
|
||||
}
|
||||
if classes["c2"] != 12 {
|
||||
testEnv.Error(classes)
|
||||
}
|
||||
if classes["c3"] != 21 {
|
||||
testEnv.Error(classes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatisticValues(testEnv *testing.T) {
|
||||
inst, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
freq := ChiMBuildFrequencyTable(0, inst)
|
||||
chiVal := chiComputeStatistic(freq[5], freq[6])
|
||||
if math.Abs(chiVal-1.89) > 0.01 {
|
||||
testEnv.Error(chiVal)
|
||||
}
|
||||
|
||||
chiVal = chiComputeStatistic(freq[1], freq[2])
|
||||
if math.Abs(chiVal-1.08) > 0.01 {
|
||||
testEnv.Error(chiVal)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChiSquareDistValues(testEnv *testing.T) {
|
||||
chiVal1 := chiSquaredPercentile(2, 4.61)
|
||||
chiVal2 := chiSquaredPercentile(3, 7.82)
|
||||
chiVal3 := chiSquaredPercentile(4, 13.28)
|
||||
if math.Abs(chiVal1-0.90) > 0.001 {
|
||||
testEnv.Error(chiVal1)
|
||||
}
|
||||
if math.Abs(chiVal2-0.95) > 0.001 {
|
||||
testEnv.Error(chiVal2)
|
||||
}
|
||||
if math.Abs(chiVal3-0.99) > 0.001 {
|
||||
testEnv.Error(chiVal3)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChiMerge1(testEnv *testing.T) {
|
||||
// See Bramer, Principles of Machine Learning
|
||||
inst, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
freq := chiMerge(inst, 0, 0.90, 0, inst.Rows)
|
||||
if len(freq) != 3 {
|
||||
testEnv.Error("Wrong length")
|
||||
}
|
||||
if freq[0].Value != 1.3 {
|
||||
testEnv.Error(freq[0])
|
||||
}
|
||||
if freq[1].Value != 56.2 {
|
||||
testEnv.Error(freq[1])
|
||||
}
|
||||
if freq[2].Value != 87.1 {
|
||||
testEnv.Error(freq[2])
|
||||
}
|
||||
}
|
||||
|
||||
func TestChiMerge2(testEnv *testing.T) {
|
||||
//
|
||||
// See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf
|
||||
// Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992
|
||||
inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
attrs := make([]int, 1)
|
||||
attrs[0] = 0
|
||||
inst.Sort(base.Ascending, attrs)
|
||||
freq := chiMerge(inst, 0, 0.90, 0, inst.Rows)
|
||||
if len(freq) != 5 {
|
||||
testEnv.Error("Wrong length (%d)", len(freq))
|
||||
testEnv.Error(freq)
|
||||
}
|
||||
if freq[0].Value != 4.3 {
|
||||
testEnv.Error(freq[0])
|
||||
}
|
||||
if freq[1].Value != 5.5 {
|
||||
testEnv.Error(freq[1])
|
||||
}
|
||||
if freq[2].Value != 5.8 {
|
||||
testEnv.Error(freq[2])
|
||||
}
|
||||
if freq[3].Value != 6.3 {
|
||||
testEnv.Error(freq[3])
|
||||
}
|
||||
if freq[4].Value != 7.1 {
|
||||
testEnv.Error(freq[4])
|
||||
}
|
||||
}
|
||||
|
||||
func TestChiMerge3(testEnv *testing.T) {
|
||||
// See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf
|
||||
// Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992
|
||||
inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
attrs := make([]int, 1)
|
||||
attrs[0] = 0
|
||||
inst.Sort(base.Ascending, attrs)
|
||||
filt := NewChiMergeFilter(inst, 0.90)
|
||||
filt.AddAttribute(inst.GetAttr(0))
|
||||
filt.Build()
|
||||
filt.Run(inst)
|
||||
fmt.Println(inst)
|
||||
}
|
51
knn/knn.go
51
knn/knn.go
@ -14,44 +14,42 @@ import (
|
||||
// The accepted distance functions at this time are 'euclidean' and 'manhattan'.
|
||||
type KNNClassifier struct {
|
||||
base.BaseEstimator
|
||||
Labels []string
|
||||
DistanceFunc string
|
||||
TrainingData *base.Instances
|
||||
DistanceFunc string
|
||||
NearestNeighbours int
|
||||
}
|
||||
|
||||
// Returns a new classifier
|
||||
func NewKnnClassifier(distfunc string) *KNNClassifier {
|
||||
func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
|
||||
KNN := KNNClassifier{}
|
||||
KNN.DistanceFunc = distfunc
|
||||
KNN.NearestNeighbours = neighbours
|
||||
return &KNN
|
||||
}
|
||||
|
||||
func (KNN *KNNClassifier) Fit(labels []string, numbers []float64, rows int, cols int) {
|
||||
if rows != len(labels) {
|
||||
panic(mat64.ErrShape)
|
||||
}
|
||||
|
||||
KNN.Data = mat64.NewDense(rows, cols, numbers)
|
||||
KNN.Labels = labels
|
||||
// Train stores the training data for llater
|
||||
func (KNN *KNNClassifier) Fit(trainingData *base.Instances) {
|
||||
KNN.TrainingData = trainingData
|
||||
}
|
||||
|
||||
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
||||
// See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
|
||||
func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
|
||||
func (KNN *KNNClassifier) PredictOne(vector []float64) string {
|
||||
|
||||
convertedVector := util.FloatsToMatrix(vector)
|
||||
// Get the number of rows
|
||||
rows, _ := KNN.Data.Dims()
|
||||
rows := KNN.TrainingData.Rows
|
||||
rownumbers := make(map[int]float64)
|
||||
labels := make([]string, 0)
|
||||
maxmap := make(map[string]int)
|
||||
|
||||
convertedVector := util.FloatsToMatrix(vector)
|
||||
|
||||
// Check what distance function we are using
|
||||
switch KNN.DistanceFunc {
|
||||
case "euclidean":
|
||||
{
|
||||
euclidean := pairwiseMetrics.NewEuclidean()
|
||||
for i := 0; i < rows; i++ {
|
||||
row := KNN.Data.RowView(i)
|
||||
row := KNN.TrainingData.GetRowVectorWithoutClass(i)
|
||||
rowMat := util.FloatsToMatrix(row)
|
||||
distance := euclidean.Distance(rowMat, convertedVector)
|
||||
rownumbers[i] = distance
|
||||
@ -61,7 +59,7 @@ func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
|
||||
{
|
||||
manhattan := pairwiseMetrics.NewEuclidean()
|
||||
for i := 0; i < rows; i++ {
|
||||
row := KNN.Data.RowView(i)
|
||||
row := KNN.TrainingData.GetRowVectorWithoutClass(i)
|
||||
rowMat := util.FloatsToMatrix(row)
|
||||
distance := manhattan.Distance(rowMat, convertedVector)
|
||||
rownumbers[i] = distance
|
||||
@ -70,16 +68,16 @@ func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
|
||||
}
|
||||
|
||||
sorted := util.SortIntMap(rownumbers)
|
||||
values := sorted[:K]
|
||||
values := sorted[:KNN.NearestNeighbours]
|
||||
|
||||
for _, elem := range values {
|
||||
// It's when we access this map
|
||||
labels = append(labels, KNN.Labels[elem])
|
||||
label := KNN.TrainingData.GetClass(elem)
|
||||
labels = append(labels, label)
|
||||
|
||||
if _, ok := maxmap[KNN.Labels[elem]]; ok {
|
||||
maxmap[KNN.Labels[elem]] += 1
|
||||
if _, ok := maxmap[label]; ok {
|
||||
maxmap[label] += 1
|
||||
} else {
|
||||
maxmap[KNN.Labels[elem]] = 1
|
||||
maxmap[label] = 1
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,6 +87,14 @@ func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
|
||||
return label
|
||||
}
|
||||
|
||||
func (KNN *KNNClassifier) Predict(what *base.Instances) *base.Instances {
|
||||
ret := what.GeneratePredictionVector()
|
||||
for i := 0; i < what.Rows; i++ {
|
||||
ret.SetAttrStr(i, 0, KNN.PredictOne(what.GetRowVectorWithoutClass(i)))
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
||||
type KNNRegressor struct {
|
||||
base.BaseEstimator
|
||||
@ -112,7 +118,6 @@ func (KNN *KNNRegressor) Fit(values []float64, numbers []float64, rows int, cols
|
||||
KNN.Values = values
|
||||
}
|
||||
|
||||
//Returns an average of the K nearest labels/variables, based on a vector input.
|
||||
func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {
|
||||
|
||||
// Get the number of rows
|
||||
|
2
knn/knn_test.csv
Normal file
2
knn/knn_test.csv
Normal file
@ -0,0 +1,2 @@
|
||||
1.2,1.2,1.5,blue
|
||||
5,5,5,red
|
|
@ -1,30 +1,39 @@
|
||||
package knn
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/sjwhitworth/golearn/base"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestKnnClassifier(t *testing.T) {
|
||||
Convey("Given labels, a classifier and data", t, func() {
|
||||
labels := []string{"blue", "blue", "red", "red"}
|
||||
data := []float64{1, 1, 1, 1, 1, 1, 3, 3, 3, 6, 6, 6}
|
||||
cls := NewKnnClassifier(labels, data, 4, 3, "euclidean")
|
||||
|
||||
trainingData, err1 := base.ParseCSVToInstances("knn_train.csv", false)
|
||||
testingData, err2 := base.ParseCSVToInstances("knn_test.csv", false)
|
||||
|
||||
if err1 != nil {
|
||||
t.Error(err1)
|
||||
return
|
||||
}
|
||||
if err2 != nil {
|
||||
t.Error(err2)
|
||||
return
|
||||
}
|
||||
|
||||
cls := NewKnnClassifier("euclidean", 2)
|
||||
cls.Fit(trainingData)
|
||||
predictions := cls.Predict(testingData)
|
||||
|
||||
Convey("When predicting the label for our first vector", func() {
|
||||
// The vector we're going to predict
|
||||
vector := []float64{1.2, 1.2, 1.5}
|
||||
result := cls.Predict(vector, 2)
|
||||
result := predictions.GetClass(0)
|
||||
Convey("The result should be 'blue", func() {
|
||||
So(result, ShouldEqual, "blue")
|
||||
})
|
||||
})
|
||||
|
||||
Convey("When predicting the label for our first vector", func() {
|
||||
// The vector we're going to predict
|
||||
vector2 := []float64{5, 5, 5}
|
||||
result2 := cls.Predict(vector2, 2)
|
||||
result2 := predictions.GetClass(1)
|
||||
Convey("The result should be 'red", func() {
|
||||
So(result2, ShouldEqual, "red")
|
||||
})
|
||||
|
4
knn/knn_train.csv
Normal file
4
knn/knn_train.csv
Normal file
@ -0,0 +1,4 @@
|
||||
1,1,1,blue
|
||||
1,1,1,blue
|
||||
3,3,3,red
|
||||
6,6,6,red
|
|
Loading…
x
Reference in New Issue
Block a user