Merge branch 'master' into feature/naive

2025-04-28 13:48:56 +08:00 · 2014-05-18 21:54:32 -03:00 · 2014-05-18 21:54:32 -03:00 · 888dfc7e6d
commit 888dfc7e6d
parent 0035dd184e 5d1039f3d0
40 changed files with 3113 additions and 154 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,9 @@
+language: go
+go: 
+ - 1.1
+ - 1.2
+ - release
+ - tip
+install:
+ - go get github.com/smartystreets/goconvey/convey
+ - go get -v ./...
--- a/README.md
+++ b/README.md
@ -4,7 +4,8 @@ GoLearn
 <img src="http://talks.golang.org/2013/advconc/gopherhat.jpg" width=125><br>
 [![GoDoc](https://godoc.org/github.com/sjwhitworth/golearn?status.png)](https://godoc.org/github.com/sjwhitworth/golearn)<br>

-A small start on a machine learning library in Go.
+GoLearn is a 'batteries included' machine learning library for Go. **Simplicity**, paired with customisability, is the goal.
+We are in active development, and would love comments from users out in the wild. Drop us a line on Twitter.

 twitter: [@golearn_ml](http://www.twitter.com/golearn_ml)

@ -17,15 +18,60 @@ cd src/github.com/sjwhitworth/golearn
 go get ./...
 ```

-Examples
+Getting Started
 =======

+Data are loaded in as Instances. You can then perform matrix like operations on them, and pass them to estimators.
+GoLearn implements the scikit-learn interface of Fit/Predict, so you can easily swap out estimators for trial and error.
+GoLearn also includes helper functions for data, like cross validation, and train and test splitting.
+
+```
+// Load in a dataset, with headers. Header attributes will be stored.
+// Think of instances as a Data Frame structure in R or Pandas.
+// You can also create instances from scratch.
+data, err := base.ParseCSVToInstances("datasets/iris_headers.csv", true)
+
+// Print a pleasant summary of your data.
+fmt.Println(data)
+
+// Split your dataframe into a training set, and a test set, with an 80/20 proportion.
+trainTest := base.InstancesTrainTestSplit(rawData, 0.8)
+trainData := trainTest[0]
+testData := trainTest[1]
+
+// Instantiate a new KNN classifier. Euclidean distance, with 2 neighbours.
+cls := knn.NewKnnClassifier("euclidean", 2)
+
+// Fit it on your training data.
+cls.Fit(trainData)
+
+// Get your predictions against test instances.
+predictions := cls.Predict(testData)
+
+// Print a confusion matrix with precision and recall metrics.
+confusionMat := evaluation.GetConfusionMatrix(testData, predictions)
+fmt.Println(evaluation.GetSummary(confusionMat))
+```
+
+```
+Iris-virginica	28	2	  56	0.9333	0.9333  0.9333
+Iris-setosa	    29	0	  59	1.0000  1.0000	1.0000
+Iris-versicolor	27	2	  57  0.9310	0.9310  0.9310
+Overall accuracy: 0.9545
+```
+
+Examples
+========
+
+GoLearn comes with practical examples. Dive in and see what is going on.
+
 ```
 cd examples/
 go run knnclassifier_iris.go
+go run instances.go
 ```

 Join the team
 =============

-If you'd like to contribute, please send me a mail at stephen dot whitworth at hailocab dot com. I will also add you to the team [Slack](https://slack.com) account, which we also use to communicate.
+Please send me a mail at stephen dot whitworth at hailocab dot com.
--- a/base/attributes.go
+++ b/base/attributes.go
@ -0,0 +1,267 @@
+package base
+
+import "fmt"
+import "strconv"
+
+const (
+	// CategoricalType is for Attributes which represent values distinctly.
+	CategoricalType = iota
+	// Float64Type should be replaced with a FractionalNumeric type [DEPRECATED].
+	Float64Type
+)
+
+// Attribute Attributes disambiguate columns of the feature matrix and declare their types.
+type Attribute interface {
+	// Returns the general characterstics of this Attribute .
+	// to avoid the overhead of casting
+	GetType() int
+	// Returns the human-readable name of this Attribute.
+	GetName() string
+	// Sets the human-readable name of this Attribute.
+	SetName(string)
+	// Gets a human-readable overview of this Attribute for debugging.
+	String() string
+	// Converts a value given from a human-readable string into a system
+	// representation. For example, a CategoricalAttribute with values
+	// ["iris-setosa", "iris-virginica"] would return the float64
+	// representation of 0 when given "iris-setosa".
+	GetSysValFromString(string) float64
+	// Converts a given value from a system representation into a human
+	// representation. For example, a CategoricalAttribute with values
+	// ["iris-setosa", "iris-viriginica"] might return "iris-setosa"
+	// when given 0.0 as the argument.
+	GetStringFromSysVal(float64) string
+	// Tests for equality with another Attribute. Other Attributes are
+	// considered equal if:
+	// * They have the same type (i.e. FloatAttribute <> CategoricalAttribute)
+	// * They have the same name
+	// * If applicable, they have the same categorical values (though not
+	//   necessarily in the same order).
+	Equals(Attribute) bool
+}
+
+// FloatAttribute is an implementation which stores floating point
+// representations of numbers.
+type FloatAttribute struct {
+	Name      string
+	Precision int
+}
+
+// NewFloatAttribute returns a new FloatAttribute with a default
+// precision of 2 decimal places
+func NewFloatAttribute() *FloatAttribute {
+	return &FloatAttribute{"", 2}
+}
+
+// Equals tests a FloatAttribute for equality with another Attribute.
+//
+// Returns false if the other Attribute has a different name
+// or if the other Attribute is not a FloatAttribute.
+func (Attr *FloatAttribute) Equals(other Attribute) bool {
+	// Check whether this FloatAttribute is equal to another
+	_, ok := other.(*FloatAttribute)
+	if !ok {
+		// Not the same type, so can't be equal
+		return false
+	}
+	if Attr.GetName() != other.GetName() {
+		return false
+	}
+	return true
+}
+
+// GetName returns this FloatAttribute's human-readable name.
+func (Attr *FloatAttribute) GetName() string {
+	return Attr.Name
+}
+
+// SetName sets this FloatAttribute's human-readable name.
+func (Attr *FloatAttribute) SetName(name string) {
+	Attr.Name = name
+}
+
+// GetType returns Float64Type.
+func (Attr *FloatAttribute) GetType() int {
+	return Float64Type
+}
+
+// String returns a human-readable summary of this Attribute.
+// e.g. "FloatAttribute(Sepal Width)"
+func (Attr *FloatAttribute) String() string {
+	return fmt.Sprintf("FloatAttribute(%s)", Attr.Name)
+}
+
+// CheckSysValFromString confirms whether a given rawVal can
+// be converted into a valid system representation.
+func (Attr *FloatAttribute) CheckSysValFromString(rawVal string) (float64, error) {
+	f, err := strconv.ParseFloat(rawVal, 64)
+	if err != nil {
+		return 0.0, err
+	}
+	return f, nil
+}
+
+// GetSysValFromString parses the given rawVal string to a float64 and returns it.
+//
+// float64 happens to be a 1-to-1 mapping to the system representation.
+// IMPORTANT: This function panic()s if rawVal is not a valid float.
+// Use CheckSysValFromString to confirm.
+func (Attr *FloatAttribute) GetSysValFromString(rawVal string) float64 {
+	f, err := strconv.ParseFloat(rawVal, 64)
+	if err != nil {
+		panic(err)
+	}
+	return f
+}
+
+// GetStringFromSysVal converts a given system value to to a string with two decimal
+// places of precision [TODO: revise this and allow more precision].
+func (Attr *FloatAttribute) GetStringFromSysVal(rawVal float64) string {
+	formatString := fmt.Sprintf("%%.%df", Attr.Precision)
+	return fmt.Sprintf(formatString, rawVal)
+}
+
+// GetSysVal returns the system representation of userVal.
+//
+// Because FloatAttribute represents float64 types, this
+// just returns its argument.
+func (Attr *FloatAttribute) GetSysVal(userVal float64) float64 {
+	return userVal
+}
+
+// GetUsrVal returns the user representation of sysVal.
+//
+// Because FloatAttribute represents float64 types, this
+// just returns its argument.
+func (Attr *FloatAttribute) GetUsrVal(sysVal float64) float64 {
+	return sysVal
+}
+
+// CategoricalAttribute is an Attribute implementation
+// which stores discrete string values
+// - useful for representing classes.
+type CategoricalAttribute struct {
+	Name   string
+	values []string
+}
+
+func NewCategoricalAttribute() *CategoricalAttribute {
+	return &CategoricalAttribute{
+		"",
+		make([]string, 0),
+	}
+}
+
+// GetName returns the human-readable name assigned to this attribute.
+func (Attr *CategoricalAttribute) GetName() string {
+	return Attr.Name
+}
+
+// SetName sets the human-readable name on this attribute.
+func (Attr *CategoricalAttribute) SetName(name string) {
+	Attr.Name = name
+}
+
+// GetType returns CategoricalType to avoid casting overhead.
+func (Attr *CategoricalAttribute) GetType() int {
+	return CategoricalType
+}
+
+// GetSysVal returns the system representation of userVal as an index into the Values slice
+// If the userVal can't be found, it returns -1.
+func (Attr *CategoricalAttribute) GetSysVal(userVal string) float64 {
+	for idx, val := range Attr.values {
+		if val == userVal {
+			return float64(idx)
+		}
+	}
+	return -1
+}
+
+// GetUsrVal returns a human-readable representation of the given sysVal.
+//
+// IMPORTANT: this function doesn't check the boundaries of the array.
+func (Attr *CategoricalAttribute) GetUsrVal(sysVal float64) string {
+	idx := int(sysVal)
+	return Attr.values[idx]
+}
+
+// GetSysValFromString returns the system representation of rawVal
+// as an index into the Values slice. If rawVal is not inside
+// the Values slice, it is appended.
+//
+// IMPORTANT: If no system representation yet exists, this functions adds it.
+// If you need to determine whether rawVal exists: use GetSysVal and check
+// for a -1 return value.
+//
+// Example: if the CategoricalAttribute contains the values ["iris-setosa",
+// "iris-virginica"] and "iris-versicolor" is provided as the argument,
+// the Values slide becomes ["iris-setosa", "iris-virginica", "iris-versicolor"]
+// and 2.00 is returned as the system representation.
+func (Attr *CategoricalAttribute) GetSysValFromString(rawVal string) float64 {
+	// Match in raw values
+	catIndex := -1
+	for i, s := range Attr.values {
+		if s == rawVal {
+			catIndex = i
+			break
+		}
+	}
+	if catIndex == -1 {
+		Attr.values = append(Attr.values, rawVal)
+		catIndex = len(Attr.values) - 1
+	}
+	return float64(catIndex)
+}
+
+// String returns a human-readable summary of this Attribute.
+//
+// Returns a string containing the list of human-readable values this
+// CategoricalAttribute can take.
+func (Attr *CategoricalAttribute) String() string {
+	return fmt.Sprintf("CategoricalAttribute(%s)", Attr.values)
+}
+
+// GetStringFromSysVal returns a human-readable value from the given system-representation
+// value val.
+//
+// IMPORTANT: This function calls panic() if the value is greater than
+// the length of the array.
+// TODO: Return a user-configurable default instead.
+func (Attr *CategoricalAttribute) GetStringFromSysVal(val float64) string {
+	convVal := int(val)
+	if convVal >= len(Attr.values) {
+		panic(fmt.Sprintf("Out of range: %d in %d", convVal, len(Attr.values)))
+	}
+	return Attr.values[convVal]
+}
+
+// Equals checks equality against another Attribute.
+//
+// Two CategoricalAttributes are considered equal if they contain
+// the same values and have the same name. Otherwise, this function
+// returns false.
+func (Attr *CategoricalAttribute) Equals(other Attribute) bool {
+	attribute, ok := other.(*CategoricalAttribute)
+	if !ok {
+		// Not the same type, so can't be equal
+		return false
+	}
+	if Attr.GetName() != attribute.GetName() {
+		return false
+	}
+
+	// Check that this CategoricalAttribute has the same
+	// values as the other, in the same order
+	if len(attribute.values) != len(Attr.values) {
+		return false
+	}
+
+	for i, a := range Attr.values {
+		if a != attribute.values[i] {
+			return false
+		}
+	}
+
+	return true
+}
--- a/base/classifier.go
+++ b/base/classifier.go
@ -0,0 +1,30 @@
+package base
+
+import (
+	"github.com/gonum/matrix/mat64"
+)
+
+// Classifier implementations predict categorical class labels.
+type Classifier interface {
+	// Takes a set of Instances, copies the class Attribute
+	// and constructs a new set of Instances of equivalent
+	// length with only the class Attribute and fills it in
+	// with predictions.
+	Predict(*Instances) *Instances
+	// Takes a set of instances and updates the Classifier's
+	// internal structures to enable prediction
+	Fit(*Instances)
+	// Why not make every classifier return a nice-looking string?
+	String() string
+}
+
+// BaseClassifier stores options common to every classifier.
+type BaseClassifier struct {
+	TrainingData *Instances
+}
+
+type BaseRegressor struct {
+	Data   mat64.Dense
+	Name   string
+	Labels []float64
+}
--- a/base/csv.go
+++ b/base/csv.go
@ -0,0 +1,213 @@
+package base
+
+import (
+	"encoding/csv"
+	"fmt"
+	"io"
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// ParseCSVGetRows returns the number of rows in a given file.
+func ParseCSVGetRows(filepath string) int {
+	file, err := os.Open(filepath)
+	if err != nil {
+		panic(err)
+	}
+	defer file.Close()
+
+	reader := csv.NewReader(file)
+	counter := 0
+	for {
+		_, err := reader.Read()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			panic(err)
+		}
+		counter++
+	}
+	return counter
+}
+
+// ParseCSVGetAttributes returns an ordered slice of appropriate-ly typed
+// and named Attributes.
+func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
+	attrs := ParseCSVSniffAttributeTypes(filepath, hasHeaders)
+	names := ParseCSVSniffAttributeNames(filepath, hasHeaders)
+	for i, attr := range attrs {
+		attr.SetName(names[i])
+	}
+	return attrs
+}
+
+// ParseCsvSniffAttributeNames returns a slice containing the top row
+// of a given CSV file, or placeholders if hasHeaders is false.
+func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
+	file, err := os.Open(filepath)
+	if err != nil {
+		panic(err)
+	}
+	defer file.Close()
+
+	reader := csv.NewReader(file)
+	headers, err := reader.Read()
+	if err != nil {
+		panic(err)
+	}
+
+	if hasHeaders {
+		for i, h := range headers {
+			headers[i] = strings.TrimSpace(h)
+		}
+		return headers
+	}
+
+	for i := range headers {
+		headers[i] = fmt.Sprintf("%d", i)
+	}
+	return headers
+
+}
+
+// ParseCSVSniffAttributeTypes returns a slice of appropriately-typed Attributes.
+//
+// The type of a given attribute is determined by looking at the first data row
+// of the CSV.
+func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute {
+	file, err := os.Open(filepath)
+	if err != nil {
+		panic(err)
+	}
+	defer file.Close()
+	reader := csv.NewReader(file)
+	attrs := make([]Attribute, 0)
+	if hasHeaders {
+		_, err := reader.Read()
+		if err != nil {
+			panic(err)
+		}
+	}
+	columns, err := reader.Read()
+	if err != nil {
+		panic(err)
+	}
+
+	for _, entry := range columns {
+		matched, err := regexp.MatchString("^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$", entry)
+		if err != nil {
+			panic(err)
+		}
+		if matched {
+			attrs = append(attrs, NewFloatAttribute())
+		} else {
+			attrs = append(attrs, new(CategoricalAttribute))
+		}
+	}
+
+	return attrs
+}
+
+// ParseCSVToInstances reads the CSV file given by filepath and returns
+// the read Instances.
+func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *Instances, err error) {
+
+	defer func() {
+		if r := recover(); r != nil {
+			var ok bool
+			if err, ok = r.(error); !ok {
+				err = fmt.Errorf("golearn: ParseCSVToInstances: %v", r)
+			}
+		}
+	}()
+
+	// Read the number of rows in the file
+	rowCount := ParseCSVGetRows(filepath)
+	if hasHeaders {
+		rowCount--
+	}
+
+	// Read the row headers
+	attrs := ParseCSVGetAttributes(filepath, hasHeaders)
+
+	// Allocate the Instances to return
+	instances = NewInstances(attrs, rowCount)
+
+	// Read the input
+	file, err := os.Open(filepath)
+	if err != nil {
+		panic(err)
+	}
+	defer file.Close()
+	reader := csv.NewReader(file)
+
+	rowCounter := 0
+	for {
+		record, err := reader.Read()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			panic(err)
+		}
+		if rowCounter == 0 {
+			if hasHeaders {
+				hasHeaders = false
+				continue
+			}
+		}
+		for i := range attrs {
+			instances.SetAttrStr(rowCounter, i, record[i])
+		}
+		rowCounter++
+	}
+
+	return
+}
+
+//ParseCSV parses a CSV file and returns the number of columns and rows, the headers, the labels associated with
+//classification, and the data that will be used for training.
+func ParseCSV(filepath string, label int, columns []int) (int, int, []string, []string, []float64) {
+	labels := make([]string, 0)
+	data := make([]float64, 0)
+	headers := make([]string, 0)
+	rows := 0
+
+	file, err := os.Open(filepath)
+	if err != nil {
+		fmt.Println("Error:", err)
+	}
+	defer file.Close()
+
+	reader := csv.NewReader(file)
+
+	headerrow, _ := reader.Read()
+
+	for _, col := range columns {
+		entry := headerrow[col]
+		headers = append(headers, entry)
+	}
+
+	for {
+		record, err := reader.Read()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			fmt.Println("Error:", err)
+		}
+
+		//
+		labels = append(labels, record[label])
+
+		//Iterate over our rows and append the values to a slice
+		for _, col := range columns {
+			entry := record[col]
+			number, _ := strconv.ParseFloat(entry, 64)
+			data = append(data, number)
+		}
+		rows++
+	}
+	cols := len(columns)
+	return cols, rows, headers, labels, data
+}
--- a/base/csv_test.go
+++ b/base/csv_test.go
@ -0,0 +1,106 @@
+package base
+
+import "testing"
+
+func TestParseCSVGetRows(testEnv *testing.T) {
+	lineCount := ParseCSVGetRows("../examples/datasets/iris.csv")
+	if lineCount != 150 {
+		testEnv.Error("Should have %d lines, has %d", 150, lineCount)
+	}
+
+	lineCount = ParseCSVGetRows("../examples/datasets/iris_headers.csv")
+	if lineCount != 151 {
+		testEnv.Error("Should have %d lines, has %d", 151, lineCount)
+	}
+
+}
+
+func TestParseCCSVGetAttributes(testEnv *testing.T) {
+	attrs := ParseCSVGetAttributes("../examples/datasets/iris_headers.csv", true)
+	if attrs[0].GetType() != Float64Type {
+		testEnv.Error("First attribute should be a float, %s", attrs[0])
+	}
+	if attrs[0].GetName() != "Sepal length" {
+		testEnv.Error(attrs[0].GetName())
+	}
+
+	if attrs[4].GetType() != CategoricalType {
+		testEnv.Error("Final attribute should be categorical, %s", attrs[4])
+	}
+	if attrs[4].GetName() != "Species" {
+		testEnv.Error(attrs[4])
+	}
+}
+
+func TestParseCsvSniffAttributeTypes(testEnv *testing.T) {
+	attrs := ParseCSVSniffAttributeTypes("../examples/datasets/iris_headers.csv", true)
+	if attrs[0].GetType() != Float64Type {
+		testEnv.Error("First attribute should be a float, %s", attrs[0])
+	}
+	if attrs[1].GetType() != Float64Type {
+		testEnv.Error("Second attribute should be a float, %s", attrs[1])
+	}
+	if attrs[2].GetType() != Float64Type {
+		testEnv.Error("Third attribute should be a float, %s", attrs[2])
+	}
+	if attrs[3].GetType() != Float64Type {
+		testEnv.Error("Fourth attribute should be a float, %s", attrs[3])
+	}
+	if attrs[4].GetType() != CategoricalType {
+		testEnv.Error("Final attribute should be categorical, %s", attrs[4])
+	}
+}
+
+func TestParseCSVSniffAttributeNamesWithHeaders(testEnv *testing.T) {
+	attrs := ParseCSVSniffAttributeNames("../examples/datasets/iris_headers.csv", true)
+	if attrs[0] != "Sepal length" {
+		testEnv.Error(attrs[0])
+	}
+	if attrs[1] != "Sepal width" {
+		testEnv.Error(attrs[1])
+	}
+	if attrs[2] != "Petal length" {
+		testEnv.Error(attrs[2])
+	}
+	if attrs[3] != "Petal width" {
+		testEnv.Error(attrs[3])
+	}
+	if attrs[4] != "Species" {
+		testEnv.Error(attrs[4])
+	}
+}
+
+func TestReadInstances(testEnv *testing.T) {
+	inst, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
+	if err != nil {
+		testEnv.Error(err)
+		return
+	}
+	row1 := inst.RowStr(0)
+	row2 := inst.RowStr(50)
+	row3 := inst.RowStr(100)
+
+	if row1 != "5.10 3.50 1.40 0.20 Iris-setosa" {
+		testEnv.Error(row1)
+	}
+	if row2 != "7.00 3.20 4.70 1.40 Iris-versicolor" {
+		testEnv.Error(row2)
+	}
+	if row3 != "6.30 3.30 6.00 2.50 Iris-virginica" {
+		testEnv.Error(row3)
+	}
+}
+
+func TestReadAwkwardInsatnces(testEnv *testing.T) {
+	inst, err := ParseCSVToInstances("../examples/datasets/chim.csv", true)
+	if err != nil {
+		testEnv.Error(err)
+		return
+	}
+	if inst.GetAttr(0).GetType() != Float64Type {
+		testEnv.Error("Should be float!")
+	}
+	if inst.GetAttr(1).GetType() != CategoricalType {
+		testEnv.Error("Should be discrete!")
+	}
+}
--- a/base/decompose_test.go
+++ b/base/decompose_test.go
@ -0,0 +1,33 @@
+package base
+
+import "testing"
+
+func TestDecomp(testEnv *testing.T) {
+	inst, err := ParseCSVToInstances("../examples/datasets/iris_binned.csv", true)
+	if err != nil {
+		testEnv.Error(err)
+		return
+	}
+	decomp := inst.DecomposeOnAttributeValues(inst.GetAttr(0))
+
+	row0 := decomp["0.00"].RowStr(0)
+	row1 := decomp["1.00"].RowStr(0)
+	/*	row2 := decomp["2.00"].RowStr(0)
+		row3 := decomp["3.00"].RowStr(0)
+		row4 := decomp["4.00"].RowStr(0)
+		row5 := decomp["5.00"].RowStr(0)
+		row6 := decomp["6.00"].RowStr(0)
+		row7 := decomp["7.00"].RowStr(0)*/
+	row8 := decomp["8.00"].RowStr(0)
+	//	row9 := decomp["9.00"].RowStr(0)
+
+	if row0 != "3.10 1.50 0.20 Iris-setosa" {
+		testEnv.Error(row0)
+	}
+	if row1 != "3.00 1.40 0.20 Iris-setosa" {
+		testEnv.Error(row1)
+	}
+	if row8 != "2.90 6.30 1.80 Iris-virginica" {
+		testEnv.Error(row8)
+	}
+}
--- a/base/instances.go
+++ b/base/instances.go
@ -0,0 +1,519 @@
+package base
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"github.com/gonum/matrix/mat64"
+	"math/rand"
+)
+
+// SortDirection specifies sorting direction...
+type SortDirection int
+
+const (
+	// Descending says that Instances should be sorted high to low...
+	Descending SortDirection = 1
+	// Ascending states that Instances should be sorted low to high...
+	Ascending SortDirection = 2
+)
+
+const highBit int64 = -1 << 63
+
+// Instances represents a grid of numbers (typed by Attributes)
+// stored internally in mat.DenseMatrix as float64's.
+// See docs/instances.md for more information.
+type Instances struct {
+	storage    *mat64.Dense
+	attributes []Attribute
+	Rows       int
+	Cols       int
+	ClassIndex int
+}
+
+func xorFloatOp(item float64) float64 {
+	var ret float64
+	var tmp int64
+	buf := bytes.NewBuffer(nil)
+	binary.Write(buf, binary.LittleEndian, item)
+	binary.Read(buf, binary.LittleEndian, &tmp)
+	tmp ^= -1 << 63
+	binary.Write(buf, binary.LittleEndian, tmp)
+	binary.Read(buf, binary.LittleEndian, &ret)
+	return ret
+}
+
+func printFloatByteArr(arr [][]byte) {
+	buf := bytes.NewBuffer(nil)
+	var f float64
+	for _, b := range arr {
+		buf.Write(b)
+		binary.Read(buf, binary.LittleEndian, &f)
+		f = xorFloatOp(f)
+		fmt.Println(f)
+	}
+}
+
+// Sort does an in-place radix sort of Instances, using SortDirection
+// direction (Ascending or Descending) with attrs as a slice of Attribute
+// indices that you want to sort by.
+//
+// IMPORTANT: Radix sort is not stable, so ordering outside
+// the attributes used for sorting is arbitrary.
+func (inst *Instances) Sort(direction SortDirection, attrs []int) {
+	// Create a buffer
+	buf := bytes.NewBuffer(nil)
+	ds := make([][]byte, inst.Rows)
+	rs := make([]int, inst.Rows)
+	for i := 0; i < inst.Rows; i++ {
+		byteBuf := make([]byte, 8*len(attrs))
+		for _, a := range attrs {
+			x := inst.storage.At(i, a)
+			binary.Write(buf, binary.LittleEndian, xorFloatOp(x))
+		}
+		buf.Read(byteBuf)
+		ds[i] = byteBuf
+		rs[i] = i
+	}
+	// Sort viua
+	valueBins := make([][][]byte, 256)
+	rowBins := make([][]int, 256)
+	for i := 0; i < 8*len(attrs); i++ {
+		for j := 0; j < len(ds); j++ {
+			// Address each row value by it's ith byte
+			b := ds[j]
+			valueBins[b[i]] = append(valueBins[b[i]], b)
+			rowBins[b[i]] = append(rowBins[b[i]], rs[j])
+		}
+		j := 0
+		for k := 0; k < 256; k++ {
+			bs := valueBins[k]
+			rc := rowBins[k]
+			copy(ds[j:], bs)
+			copy(rs[j:], rc)
+			j += len(bs)
+			valueBins[k] = bs[:0]
+			rowBins[k] = rc[:0]
+		}
+	}
+
+	for _, b := range ds {
+		var v float64
+		buf.Write(b)
+		binary.Read(buf, binary.LittleEndian, &v)
+	}
+
+	done := make([]bool, inst.Rows)
+	for index := range rs {
+		if done[index] {
+			continue
+		}
+		j := index
+		for {
+			done[j] = true
+			if rs[j] != index {
+				inst.swapRows(j, rs[j])
+				j = rs[j]
+			} else {
+				break
+			}
+		}
+	}
+
+	if direction == Descending {
+		// Reverse the matrix
+		for i, j := 0, inst.Rows-1; i < j; i, j = i+1, j-1 {
+			inst.swapRows(i, j)
+		}
+	}
+}
+
+// NewInstances returns a preallocated Instances structure
+// with some helful values pre-filled.
+func NewInstances(attrs []Attribute, rows int) *Instances {
+	rawStorage := make([]float64, rows*len(attrs))
+	return NewInstancesFromRaw(attrs, rows, rawStorage)
+}
+
+// CheckNewInstancesFromRaw checks whether a call to NewInstancesFromRaw
+// is likely to produce an error-free result.
+func CheckNewInstancesFromRaw(attrs []Attribute, rows int, data []float64) error {
+	size := rows * len(attrs)
+	if size < len(data) {
+		return errors.New("base: data length is larger than the rows * attribute space.")
+	} else if size > len(data) {
+		return errors.New("base: data is smaller than the rows * attribute space")
+	}
+	return nil
+}
+
+// NewInstancesFromRaw wraps a slice of float64 numbers in a
+// mat64.Dense structure, reshaping it with the given number of rows
+// and representing it with the given attrs (Attribute slice)
+//
+// IMPORTANT: if the |attrs| * |rows| value doesn't equal len(data)
+// then panic()s may occur. Use CheckNewInstancesFromRaw to confirm.
+func NewInstancesFromRaw(attrs []Attribute, rows int, data []float64) *Instances {
+	rawStorage := mat64.NewDense(rows, len(attrs), data)
+	return NewInstancesFromDense(attrs, rows, rawStorage)
+}
+
+// NewInstancesFromDense creates a set of Instances from a mat64.Dense
+// matrix
+func NewInstancesFromDense(attrs []Attribute, rows int, mat *mat64.Dense) *Instances {
+	return &Instances{mat, attrs, rows, len(attrs), len(attrs) - 1}
+}
+
+// InstancesTrainTestSplit takes a given Instances (src) and a train-test fraction
+// (prop) and returns an array of two new Instances, one containing approximately
+// that fraction and the other containing what's left.
+//
+// IMPORTANT: this function is only meaningful when prop is between 0.0 and 1.0.
+// Using any other values may result in odd behaviour.
+func InstancesTrainTestSplit(src *Instances, prop float64) [2](*Instances) {
+	trainingRows := make([]int, 0)
+	testingRows := make([]int, 0)
+	numAttrs := len(src.attributes)
+	for i := 0; i < src.Rows; i++ {
+		trainOrTest := rand.Intn(101)
+		if trainOrTest > int(100*prop) {
+			trainingRows = append(trainingRows, i)
+		} else {
+			testingRows = append(testingRows, i)
+		}
+	}
+
+	rawTrainMatrix := mat64.NewDense(len(trainingRows), numAttrs, make([]float64, len(trainingRows)*numAttrs))
+	rawTestMatrix := mat64.NewDense(len(testingRows), numAttrs, make([]float64, len(testingRows)*numAttrs))
+
+	for i, row := range trainingRows {
+		rowDat := src.storage.RowView(row)
+		rawTrainMatrix.SetRow(i, rowDat)
+	}
+	for i, row := range testingRows {
+		rowDat := src.storage.RowView(row)
+		rawTestMatrix.SetRow(i, rowDat)
+	}
+
+	var ret [2]*Instances
+	ret[0] = NewInstancesFromDense(src.attributes, len(trainingRows), rawTrainMatrix)
+	ret[1] = NewInstancesFromDense(src.attributes, len(testingRows), rawTestMatrix)
+	return ret
+}
+
+// CountAttrValues returns the distribution of values of a given
+// Attribute.
+// IMPORTANT: calls panic() if the attribute index of a cannot be
+// determined. Call GetAttrIndex(a) and check for a -1 return value.
+func (inst *Instances) CountAttrValues(a Attribute) map[string]int {
+	ret := make(map[string]int)
+	attrIndex := inst.GetAttrIndex(a)
+	if attrIndex == -1 {
+		panic("Invalid attribute")
+	}
+	for i := 0; i < inst.Rows; i++ {
+		sysVal := inst.Get(i, attrIndex)
+		stringVal := a.GetStringFromSysVal(sysVal)
+		ret[stringVal] += 1
+	}
+	return ret
+}
+
+// CountClassValues returns the class distribution of this
+// Instances set
+func (inst *Instances) CountClassValues() map[string]int {
+	a := inst.GetAttr(inst.ClassIndex)
+	return inst.CountAttrValues(a)
+}
+
+// DecomposeOnAttributeValues divides the instance set depending on the
+// value of a given Attribute, constructs child instances, and returns
+// them in a map keyed on the string value of that Attribute.
+// IMPORTANT: calls panic() if the attribute index of at cannot be determined.
+// Use GetAttrIndex(at) and check for a non-zero return value.
+func (inst *Instances) DecomposeOnAttributeValues(at Attribute) map[string]*Instances {
+	// Find the attribute we're decomposing on
+	attrIndex := inst.GetAttrIndex(at)
+	if attrIndex == -1 {
+		panic("Invalid attribute index")
+	}
+	// Construct the new attribute set
+	newAttrs := make([]Attribute, 0)
+	for i := range inst.attributes {
+		a := inst.attributes[i]
+		if a.Equals(at) {
+			continue
+		}
+		newAttrs = append(newAttrs, a)
+	}
+	// Create the return map, several counting maps
+	ret := make(map[string]*Instances)
+	counts := inst.CountAttrValues(at) // So we know what to allocate
+	rows := make(map[string]int)
+	for k := range counts {
+		tmp := NewInstances(newAttrs, counts[k])
+		ret[k] = tmp
+	}
+	for i := 0; i < inst.Rows; i++ {
+		newAttrCounter := 0
+		classVar := at.GetStringFromSysVal(inst.Get(i, attrIndex))
+		dest := ret[classVar]
+		destRow := rows[classVar]
+		for j := 0; j < inst.Cols; j++ {
+			a := inst.attributes[j]
+			if a.Equals(at) {
+				continue
+			}
+			dest.Set(destRow, newAttrCounter, inst.Get(i, j))
+			newAttrCounter++
+		}
+		rows[classVar]++
+	}
+	return ret
+}
+
+// Get returns the system representation (float64) of the value
+// stored at the given row and col coordinate.
+func (inst *Instances) Get(row int, col int) float64 {
+	return inst.storage.At(row, col)
+}
+
+// Set sets the system representation (float64) to val at the
+// given row and column coordinate.
+func (inst *Instances) Set(row int, col int, val float64) {
+	inst.storage.Set(row, col, val)
+}
+
+// GetRowVector returns a row of system representation
+// values at the given row index.
+func (inst *Instances) GetRowVector(row int) []float64 {
+	return inst.storage.RowView(row)
+}
+
+// GetRowVector returns a row of system representation
+// values at the given row index, excluding the class attribute
+func (inst *Instances) GetRowVectorWithoutClass(row int) []float64 {
+	rawRow := make([]float64, inst.Cols)
+	copy(rawRow, inst.GetRowVector(row))
+	return append(rawRow[0:inst.ClassIndex], rawRow[inst.ClassIndex+1:inst.Cols]...)
+}
+
+// GetClass returns the string representation of the given
+// row's class, as determined by the Attribute at the ClassIndex
+// position from GetAttr
+func (inst *Instances) GetClass(row int) string {
+	attr := inst.GetAttr(inst.ClassIndex)
+	val := inst.Get(row, inst.ClassIndex)
+	return attr.GetStringFromSysVal(val)
+}
+
+func (Inst *Instances) GetClassAttrPtr() *Attribute {
+	attr := Inst.GetAttr(Inst.ClassIndex)
+	return &attr
+}
+
+func (Inst *Instances) GetClassAttr() Attribute {
+	return Inst.GetAttr(Inst.ClassIndex)
+}
+
+//
+// Attribute functions
+//
+
+// GetAttributeCount returns the number of attributes represented.
+func (inst *Instances) GetAttributeCount() int {
+	// Return the number of attributes attached to this Instance set
+	return len(inst.attributes)
+}
+
+// SetAttrStr sets the system-representation value of row in column attr
+// to value val, implicitly converting the string to system-representation
+// via the appropriate Attribute function.
+func (inst *Instances) SetAttrStr(row int, attr int, val string) {
+	// Set an attribute on a particular row from a string value
+	a := inst.attributes[attr]
+	sysVal := a.GetSysValFromString(val)
+	inst.storage.Set(row, attr, sysVal)
+}
+
+// GetAttrStr returns a human-readable string value stored in column `attr'
+// and row `row', as determined by the appropriate Attribute function.
+func (inst *Instances) GetAttrStr(row int, attr int) string {
+	// Get a human-readable value from a particular row
+	a := inst.attributes[attr]
+	usrVal := a.GetStringFromSysVal(inst.Get(row, attr))
+	return usrVal
+}
+
+// GetAttr returns information about an attribute at given index
+// in the attributes slice.
+func (inst *Instances) GetAttr(attrIndex int) Attribute {
+	// Return a copy of an attribute attached to this Instance set
+	return inst.attributes[attrIndex]
+}
+
+// GetAttrIndex returns the offset of a given Attribute `a' to an
+// index in the attributes slice
+func (inst *Instances) GetAttrIndex(of Attribute) int {
+	// Finds the offset of an Attribute in this instance set
+	// Returns -1 if no Attribute matches
+	for i, a := range inst.attributes {
+		if a.Equals(of) {
+			return i
+		}
+	}
+	return -1
+}
+
+// ReplaceAttr overwrites the attribute at `index' with `a'
+func (inst *Instances) ReplaceAttr(index int, a Attribute) {
+	// Replace an Attribute at index with another
+	// DOESN'T CONVERT ANY EXISTING VALUES
+	inst.attributes[index] = a
+}
+
+//
+// Printing functions
+//
+
+// RowStr returns a human-readable representation of a given row.
+func (inst *Instances) RowStr(row int) string {
+	// Prints a given row
+	var buffer bytes.Buffer
+	for j := 0; j < inst.Cols; j++ {
+		val := inst.storage.At(row, j)
+		a := inst.attributes[j]
+		postfix := " "
+		if j == inst.Cols-1 {
+			postfix = ""
+		}
+		buffer.WriteString(fmt.Sprintf("%s%s", a.GetStringFromSysVal(val), postfix))
+	}
+	return buffer.String()
+}
+
+func (inst *Instances) String() string {
+	var buffer bytes.Buffer
+
+	buffer.WriteString("Instances with ")
+	buffer.WriteString(fmt.Sprintf("%d row(s) ", inst.Rows))
+	buffer.WriteString(fmt.Sprintf("%d attribute(s)\n", inst.Cols))
+
+	buffer.WriteString(fmt.Sprintf("Attributes: \n"))
+	for i, a := range inst.attributes {
+		prefix := "\t"
+		if i == inst.ClassIndex {
+			prefix = "*\t"
+		}
+		buffer.WriteString(fmt.Sprintf("%s%s\n", prefix, a))
+	}
+
+	buffer.WriteString("\nData:\n")
+	maxRows := 30
+	if inst.Rows < maxRows {
+		maxRows = inst.Rows
+	}
+
+	for i := 0; i < maxRows; i++ {
+		buffer.WriteString("\t")
+		for j := 0; j < inst.Cols; j++ {
+			val := inst.storage.At(i, j)
+			a := inst.attributes[j]
+			buffer.WriteString(fmt.Sprintf("%s ", a.GetStringFromSysVal(val)))
+		}
+		buffer.WriteString("\n")
+	}
+
+	missingRows := inst.Rows - maxRows
+	if missingRows != 0 {
+		buffer.WriteString(fmt.Sprintf("\t...\n%d row(s) undisplayed", missingRows))
+	} else {
+		buffer.WriteString("All rows displayed")
+	}
+
+	return buffer.String()
+}
+
+// SelectAttributes returns a new instance set containing
+// the values from this one with only the Attributes specified
+func (inst *Instances) SelectAttributes(attrs []Attribute) *Instances {
+	ret := NewInstances(attrs, inst.Rows)
+	attrIndices := make([]int, 0)
+	for _, a := range attrs {
+		attrIndex := inst.GetAttrIndex(a)
+		attrIndices = append(attrIndices, attrIndex)
+	}
+	for i := 0; i < inst.Rows; i++ {
+		for j, a := range attrIndices {
+			ret.Set(i, j, inst.Get(i, a))
+		}
+	}
+	return ret
+}
+
+// GeneratePredictionVector generates a new set of Instances
+// with the same number of rows, but only this Instance set's
+// class Attribute.
+func (inst *Instances) GeneratePredictionVector() *Instances {
+	attrs := make([]Attribute, 1)
+	attrs[0] = inst.GetClassAttr()
+	ret := NewInstances(attrs, inst.Rows)
+	return ret
+}
+
+// Shuffle randomizes the row order in place
+func (inst *Instances) Shuffle() {
+	for i := 0; i < inst.Rows; i++ {
+		j := rand.Intn(inst.Rows)
+		inst.swapRows(i, j)
+	}
+}
+
+// SampleWithReplacement returns a new set of Instances of size `size'
+// containing random rows from this set of Instances.
+//
+// IMPORTANT: There's a high chance of seeing duplicate rows
+// whenever size is close to the row count.
+func (inst *Instances) SampleWithReplacement(size int) *Instances {
+	ret := NewInstances(inst.attributes, size)
+	for i := 0; i < size; i++ {
+		srcRow := rand.Intn(inst.Rows)
+		for j := 0; j < inst.Cols; j++ {
+			ret.Set(i, j, inst.Get(srcRow, j))
+		}
+	}
+	return ret
+}
+
+// Equal checks whether a given Instance set is exactly the same
+// as another: same size and same values (as determined by the Attributes)
+//
+// IMPORTANT: does not explicitly check if the Attributes are considered equal.
+func (inst *Instances) Equal(other *Instances) bool {
+	if inst.Rows != other.Rows {
+		return false
+	}
+	if inst.Cols != other.Cols {
+		return false
+	}
+	for i := 0; i < inst.Rows; i++ {
+		for j := 0; j < inst.Cols; j++ {
+			if inst.GetAttrStr(i, j) != other.GetAttrStr(i, j) {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+func (inst *Instances) swapRows(r1 int, r2 int) {
+	row1buf := make([]float64, inst.Cols)
+	row2buf := make([]float64, inst.Cols)
+	row1 := inst.storage.RowView(r1)
+	row2 := inst.storage.RowView(r2)
+	copy(row1buf, row1)
+	copy(row2buf, row2)
+	inst.storage.SetRow(r1, row2buf)
+	inst.storage.SetRow(r2, row1buf)
+}
--- a/base/sort_test.go
+++ b/base/sort_test.go
@ -0,0 +1,107 @@
+package base
+
+import "testing"
+
+func isSortedAsc(inst *Instances, attrIndex int) bool {
+	valPrev := 0.0
+	for i := 0; i < inst.Rows; i++ {
+		cur := inst.Get(i, attrIndex)
+		if i > 0 {
+			if valPrev > cur {
+				return false
+			}
+		}
+		valPrev = cur
+	}
+	return true
+}
+
+func isSortedDesc(inst *Instances, attrIndex int) bool {
+	valPrev := 0.0
+	for i := 0; i < inst.Rows; i++ {
+		cur := inst.Get(i, attrIndex)
+		if i > 0 {
+			if valPrev < cur {
+				return false
+			}
+		}
+		valPrev = cur
+	}
+	return true
+}
+
+func TestSortDesc(testEnv *testing.T) {
+	inst1, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
+	if err != nil {
+		testEnv.Error(err)
+		return
+	}
+	inst2, err := ParseCSVToInstances("../examples/datasets/iris_sorted_desc.csv", true)
+	if err != nil {
+		testEnv.Error(err)
+		return
+	}
+
+	if isSortedDesc(inst1, 0) {
+		testEnv.Error("Can't test descending sort order")
+	}
+	if !isSortedDesc(inst2, 0) {
+		testEnv.Error("Reference data not sorted in descending order!")
+	}
+	attrs := make([]int, 4)
+	attrs[0] = 3
+	attrs[1] = 2
+	attrs[2] = 1
+	attrs[3] = 0
+	inst1.Sort(Descending, attrs)
+	if !isSortedDesc(inst1, 0) {
+		testEnv.Error("Instances are not sorted in descending order")
+		testEnv.Error(inst1)
+	}
+	if !inst2.Equal(inst1) {
+		inst1.storage.Sub(inst1.storage, inst2.storage)
+		testEnv.Error(inst1.storage)
+		testEnv.Error("Instances don't match")
+		testEnv.Error(inst1)
+		testEnv.Error(inst2)
+	}
+}
+
+func TestSortAsc(testEnv *testing.T) {
+	inst, err := ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
+	if isSortedAsc(inst, 0) {
+		testEnv.Error("Can't test ascending sort on something ascending already")
+	}
+	if err != nil {
+		testEnv.Error(err)
+		return
+	}
+	attrs := make([]int, 4)
+	attrs[0] = 3
+	attrs[1] = 2
+	attrs[2] = 1
+	attrs[3] = 0
+	inst.Sort(Ascending, attrs)
+	if !isSortedAsc(inst, 0) {
+		testEnv.Error("Instances are not sorted in ascending order")
+		testEnv.Error(inst)
+	}
+
+	inst2, err := ParseCSVToInstances("../examples/datasets/iris_sorted_asc.csv", true)
+	if err != nil {
+		testEnv.Error(err)
+		return
+	}
+	if !isSortedAsc(inst2, 0) {
+		testEnv.Error("This file should be sorted in ascending order")
+	}
+
+	if !inst2.Equal(inst) {
+		inst.storage.Sub(inst.storage, inst2.storage)
+		testEnv.Error(inst.storage)
+		testEnv.Error("Instances don't match")
+		testEnv.Error(inst)
+		testEnv.Error(inst2)
+	}
+
+}
--- a/data/csv.go
+++ b/data/csv.go
@ -1,57 +0,0 @@
-/* Data - consists of helper functions for parsing different data formats */
-
-package data
-
-import (
-	"encoding/csv"
-	"fmt"
-	"io"
-	"os"
-	"strconv"
-)
-
-//Parses a CSV file, returning the number of columns and rows, the headers, the labels associated with
-//classification, and the data that will be used for training.
-func ParseCsv(filepath string, label int, columns []int) (int, int, []string, []string, []float64) {
-	labels := make([]string, 0)
-	data := make([]float64, 0)
-	headers := make([]string, 0)
-	rows := 0
-
-	file, err := os.Open(filepath)
-	if err != nil {
-		fmt.Println("Error:", err)
-	}
-	defer file.Close()
-
-	reader := csv.NewReader(file)
-
-	headerrow, _ := reader.Read()
-
-	for _, col := range columns {
-		entry := headerrow[col]
-		headers = append(headers, entry)
-	}
-
-	for {
-		record, err := reader.Read()
-		if err == io.EOF {
-			break
-		} else if err != nil {
-			fmt.Println("Error:", err)
-		}
-
-		//
-		labels = append(labels, record[label])
-
-		//Iterate over our rows and append the values to a slice
-		for _, col := range columns {
-			entry := record[col]
-			number, _ := strconv.ParseFloat(entry, 64)
-			data = append(data, number)
-		}
-		rows += 1
-	}
-	cols := len(columns)
-	return cols, rows, headers, labels, data
-}
--- a/evaluation/confusion.go
+++ b/evaluation/confusion.go
@ -0,0 +1,191 @@
+package evaluation
+
+import (
+	"bytes"
+	"fmt"
+	"github.com/sjwhitworth/golearn/base"
+)
+
+// ConfusionMatrix is a nested map of actual and predicted class counts
+type ConfusionMatrix map[string]map[string]int
+
+// GetConfusionMatrix builds a ConfusionMatrix from a set of reference (`ref')
+// and generate (`gen') Instances.
+func GetConfusionMatrix(ref *base.Instances, gen *base.Instances) map[string]map[string]int {
+
+	if ref.Rows != gen.Rows {
+		panic("Row counts should match")
+	}
+
+	ret := make(map[string]map[string]int)
+
+	for i := 0; i < ref.Rows; i++ {
+		referenceClass := ref.GetClass(i)
+		predictedClass := gen.GetClass(i)
+		if _, ok := ret[referenceClass]; ok {
+			ret[referenceClass][predictedClass] += 1
+		} else {
+			ret[referenceClass] = make(map[string]int)
+			ret[referenceClass][predictedClass] = 1
+		}
+	}
+	return ret
+}
+
+// GetTruePositives returns the number of times an entry is
+// predicted successfully in a given ConfusionMatrix.
+func GetTruePositives(class string, c ConfusionMatrix) float64 {
+	return float64(c[class][class])
+}
+
+// GetFalsePositives returns the number of times an entry is
+// incorrectly predicted as having a given class.
+func GetFalsePositives(class string, c ConfusionMatrix) float64 {
+	ret := 0.0
+	for k := range c {
+		if k == class {
+			continue
+		}
+		ret += float64(c[k][class])
+	}
+	return ret
+}
+
+// GetFalseNegatives returns the number of times an entry is
+// incorrectly predicted as something other than the given class.
+func GetFalseNegatives(class string, c ConfusionMatrix) float64 {
+	ret := 0.0
+	for k := range c[class] {
+		if k == class {
+			continue
+		}
+		ret += float64(c[class][k])
+	}
+	return ret
+}
+
+// GetTrueNegatives returns the number of times an entry is
+// correctly predicted as something other than the given class.
+func GetTrueNegatives(class string, c ConfusionMatrix) float64 {
+	ret := 0.0
+	for k := range c {
+		if k == class {
+			continue
+		}
+		for l := range c[k] {
+			if l == class {
+				continue
+			}
+			ret += float64(c[k][l])
+		}
+	}
+	return ret
+}
+
+// GetPrecision returns the fraction of of the total predictions
+// for a given class which were correct.
+func GetPrecision(class string, c ConfusionMatrix) float64 {
+	// Fraction of retrieved instances that are relevant
+	truePositives := GetTruePositives(class, c)
+	falsePositives := GetFalsePositives(class, c)
+	return truePositives / (truePositives + falsePositives)
+}
+
+// GetRecall returns the fraction of the total occurrences of a
+// given class which were predicted.
+func GetRecall(class string, c ConfusionMatrix) float64 {
+	// Fraction of relevant instances that are retrieved
+	truePositives := GetTruePositives(class, c)
+	falseNegatives := GetFalseNegatives(class, c)
+	return truePositives / (truePositives + falseNegatives)
+}
+
+// GetF1Score computes the harmonic mean of precision and recall
+// (equivalently called F-measure)
+func GetF1Score(class string, c ConfusionMatrix) float64 {
+	precision := GetPrecision(class, c)
+	recall := GetRecall(class, c)
+	return 2 * (precision * recall) / (precision + recall)
+}
+
+// GetAccuracy computes the overall classification accuracy
+// That is (number of correctly classified instances) / total instances
+func GetAccuracy(c ConfusionMatrix) float64 {
+	correct := 0
+	total := 0
+	for i := range c {
+		for j := range c[i] {
+			if i == j {
+				correct += c[i][j]
+			}
+			total += c[i][j]
+		}
+	}
+	return float64(correct) / float64(total)
+}
+
+// GetMicroPrecision assesses Classifier performance across
+// all classes using the total true positives and false positives.
+func GetMicroPrecision(c ConfusionMatrix) float64 {
+	truePositives := 0.0
+	falsePositives := 0.0
+	for k := range c {
+		truePositives += GetTruePositives(k, c)
+		falsePositives += GetFalsePositives(k, c)
+	}
+	return truePositives / (truePositives + falsePositives)
+}
+
+// GetMacroPrecision assesses Classifier performance across all
+// classes by averaging the precision measures achieved for each class.
+func GetMacroPrecision(c ConfusionMatrix) float64 {
+	precisionVals := 0.0
+	for k := range c {
+		precisionVals += GetPrecision(k, c)
+	}
+	return precisionVals / float64(len(c))
+}
+
+// GetMicroRecall assesses Classifier performance across all
+// classes using the total true positives and false negatives.
+func GetMicroRecall(c ConfusionMatrix) float64 {
+	truePositives := 0.0
+	falseNegatives := 0.0
+	for k := range c {
+		truePositives += GetTruePositives(k, c)
+		falseNegatives += GetFalseNegatives(k, c)
+	}
+	return truePositives / (truePositives + falseNegatives)
+}
+
+// GetMacroRecall assesses Classifier performance across all classes
+// by averaging the recall measures achieved for each class
+func GetMacroRecall(c ConfusionMatrix) float64 {
+	recallVals := 0.0
+	for k := range c {
+		recallVals += GetRecall(k, c)
+	}
+	return recallVals / float64(len(c))
+}
+
+// GetSummary returns a table of precision, recall, true positive,
+// false positive, and true negatives for each class for a given
+// ConfusionMatrix
+func GetSummary(c ConfusionMatrix) string {
+	var buffer bytes.Buffer
+	for k := range c {
+		buffer.WriteString(k)
+		buffer.WriteString("\t")
+		tp := GetTruePositives(k, c)
+		fp := GetFalsePositives(k, c)
+		tn := GetTrueNegatives(k, c)
+		prec := GetPrecision(k, c)
+		rec := GetRecall(k, c)
+		f1 := GetF1Score(k, c)
+		buffer.WriteString(fmt.Sprintf("%.0f\t%.0f\t%.0f\t%.4f\t%.4f\t%.4f\n", tp, fp, tn, prec, rec, f1))
+	}
+
+	buffer.WriteString(fmt.Sprintf("Overall accuracy: %.4f\n", GetAccuracy(c)))
+
+	return buffer.String()
+}
--- a/evaluation/confusion_test.go
+++ b/evaluation/confusion_test.go
@ -0,0 +1,104 @@
+package evaluation
+
+import (
+	"math"
+	"testing"
+)
+
+func TestMetrics(testEnv *testing.T) {
+	confusionMat := make(ConfusionMatrix)
+	confusionMat["a"] = make(map[string]int)
+	confusionMat["b"] = make(map[string]int)
+	confusionMat["a"]["a"] = 75
+	confusionMat["a"]["b"] = 5
+	confusionMat["b"]["a"] = 10
+	confusionMat["b"]["b"] = 10
+
+	tp := GetTruePositives("a", confusionMat)
+	if math.Abs(tp-75) >= 1 {
+		testEnv.Error(tp)
+	}
+	tp = GetTruePositives("b", confusionMat)
+	if math.Abs(tp-10) >= 1 {
+		testEnv.Error(tp)
+	}
+
+	fn := GetFalseNegatives("a", confusionMat)
+	if math.Abs(fn-5) >= 1 {
+		testEnv.Error(fn)
+	}
+	fn = GetFalseNegatives("b", confusionMat)
+	if math.Abs(fn-10) >= 1 {
+		testEnv.Error(fn)
+	}
+
+	tn := GetTrueNegatives("a", confusionMat)
+	if math.Abs(tn-10) >= 1 {
+		testEnv.Error(tn)
+	}
+	tn = GetTrueNegatives("b", confusionMat)
+	if math.Abs(tn-75) >= 1 {
+		testEnv.Error(tn)
+	}
+
+	fp := GetFalsePositives("a", confusionMat)
+	if math.Abs(fp-10) >= 1 {
+		testEnv.Error(fp)
+	}
+
+	fp = GetFalsePositives("b", confusionMat)
+	if math.Abs(fp-5) >= 1 {
+		testEnv.Error(fp)
+	}
+
+	precision := GetPrecision("a", confusionMat)
+	recall := GetRecall("a", confusionMat)
+
+	if math.Abs(precision-0.88) >= 0.01 {
+		testEnv.Error(precision)
+	}
+
+	if math.Abs(recall-0.94) >= 0.01 {
+		testEnv.Error(recall)
+	}
+
+	precision = GetPrecision("b", confusionMat)
+	recall = GetRecall("b", confusionMat)
+	if math.Abs(precision-0.666) >= 0.01 {
+		testEnv.Error(precision)
+	}
+
+	if math.Abs(recall-0.50) >= 0.01 {
+		testEnv.Error(recall)
+	}
+
+	precision = GetMicroPrecision(confusionMat)
+	if math.Abs(precision-0.85) >= 0.01 {
+		testEnv.Error(precision)
+	}
+
+	recall = GetMicroRecall(confusionMat)
+	if math.Abs(recall-0.85) >= 0.01 {
+		testEnv.Error(recall)
+	}
+
+	precision = GetMacroPrecision(confusionMat)
+	if math.Abs(precision-0.775) >= 0.01 {
+		testEnv.Error(precision)
+	}
+
+	recall = GetMacroRecall(confusionMat)
+	if math.Abs(recall-0.719) > 0.01 {
+		testEnv.Error(recall)
+	}
+
+	fmeasure := GetF1Score("a", confusionMat)
+	if math.Abs(fmeasure-0.91) >= 0.1 {
+		testEnv.Error(fmeasure)
+	}
+
+	accuracy := GetAccuracy(confusionMat)
+	if math.Abs(accuracy-0.85) >= 0.1 {
+		testEnv.Error(accuracy)
+	}
+}
--- a/examples/datasets/chim.csv
+++ b/examples/datasets/chim.csv
@ -0,0 +1,61 @@
+A,class
+1.3,c1
+1.3,c3
+1.3,c3
+1.3,c3
+1.3,c3
+1.4,c2
+1.8,c1
+1.8,c2
+1.8,c3
+2.4,c1
+2.4,c1
+2.4,c1
+2.4,c1
+2.4,c1
+2.4,c1
+2.4,c3
+2.4,c3
+6.5,c1
+6.5,c1
+6.5,c1
+6.5,c2
+6.5,c2
+6.5,c3
+6.5,c3
+6.5,c3
+6.5,c3
+8.7,c1
+8.7,c1
+8.7,c1
+8.7,c1
+8.7,c1
+8.7,c1
+8.7,c3
+12.1,c1
+12.1,c1
+12.1,c1
+12.1,c1
+12.1,c1
+12.1,c1
+12.1,c1
+12.1,c2
+12.1,c2
+12.1,c3
+12.1,c3
+12.1,c3
+29.4,c3
+56.2,c1
+56.2,c1
+56.2,c2
+56.2,c2
+56.2,c2
+56.2,c2
+87.1,c2
+87.1,c3
+87.1,c3
+87.1,c3
+89.0,c1
+89.0,c2
+89.0,c3
+89.0,c3
--- a/examples/datasets/iris.csv
+++ b/examples/datasets/iris.csv
@ -147,4 +147,4 @@
 6.3,2.5,5.0,1.9,Iris-virginica
 6.5,3.0,5.2,2.0,Iris-virginica
 6.2,3.4,5.4,2.3,Iris-virginica
-5.9,3.0,5.1,1.8,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica
--- a/examples/datasets/iris_binned.csv
+++ b/examples/datasets/iris_binned.csv
@ -0,0 +1,152 @@
+Sepal length,Sepal width,Petal length,Petal width,Species
+2,3.5,1.4,0.2,Iris-setosa
+1,3,1.4,0.2,Iris-setosa
+1,3.2,1.3,0.2,Iris-setosa
+0,3.1,1.5,0.2,Iris-setosa
+1,3.6,1.4,0.2,Iris-setosa
+3,3.9,1.7,0.4,Iris-setosa
+0,3.4,1.4,0.3,Iris-setosa
+1,3.4,1.5,0.2,Iris-setosa
+0,2.9,1.4,0.2,Iris-setosa
+1,3.1,1.5,0.1,Iris-setosa
+3,3.7,1.5,0.2,Iris-setosa
+1,3.4,1.6,0.2,Iris-setosa
+1,3,1.4,0.1,Iris-setosa
+0,3,1.1,0.1,Iris-setosa
+4,4,1.2,0.2,Iris-setosa
+3,4.4,1.5,0.4,Iris-setosa
+3,3.9,1.3,0.4,Iris-setosa
+2,3.5,1.4,0.3,Iris-setosa
+3,3.8,1.7,0.3,Iris-setosa
+2,3.8,1.5,0.3,Iris-setosa
+3,3.4,1.7,0.2,Iris-setosa
+2,3.7,1.5,0.4,Iris-setosa
+0,3.6,1,0.2,Iris-setosa
+2,3.3,1.7,0.5,Iris-setosa
+1,3.4,1.9,0.2,Iris-setosa
+1,3,1.6,0.2,Iris-setosa
+1,3.4,1.6,0.4,Iris-setosa
+2,3.5,1.5,0.2,Iris-setosa
+2,3.4,1.4,0.2,Iris-setosa
+1,3.2,1.6,0.2,Iris-setosa
+1,3.1,1.6,0.2,Iris-setosa
+3,3.4,1.5,0.4,Iris-setosa
+2,4.1,1.5,0.1,Iris-setosa
+3,4.2,1.4,0.2,Iris-setosa
+1,3.1,1.5,0.1,Iris-setosa
+1,3.2,1.2,0.2,Iris-setosa
+3,3.5,1.3,0.2,Iris-setosa
+1,3.1,1.5,0.1,Iris-setosa
+0,3,1.3,0.2,Iris-setosa
+2,3.4,1.5,0.2,Iris-setosa
+1,3.5,1.3,0.3,Iris-setosa
+0,2.3,1.3,0.3,Iris-setosa
+0,3.2,1.3,0.2,Iris-setosa
+1,3.5,1.6,0.6,Iris-setosa
+2,3.8,1.9,0.4,Iris-setosa
+1,3,1.4,0.3,Iris-setosa
+2,3.8,1.6,0.2,Iris-setosa
+0,3.2,1.4,0.2,Iris-setosa
+2,3.7,1.5,0.2,Iris-setosa
+1,3.3,1.4,0.2,Iris-setosa
+7,3.2,4.7,1.4,Iris-versicolor
+5,3.2,4.5,1.5,Iris-versicolor
+7,3.1,4.9,1.5,Iris-versicolor
+3,2.3,4,1.3,Iris-versicolor
+6,2.8,4.6,1.5,Iris-versicolor
+3,2.8,4.5,1.3,Iris-versicolor
+5,3.3,4.7,1.6,Iris-versicolor
+1,2.4,3.3,1,Iris-versicolor
+6,2.9,4.6,1.3,Iris-versicolor
+2,2.7,3.9,1.4,Iris-versicolor
+1,2,3.5,1,Iris-versicolor
+4,3,4.2,1.5,Iris-versicolor
+4,2.2,4,1,Iris-versicolor
+5,2.9,4.7,1.4,Iris-versicolor
+3,2.9,3.6,1.3,Iris-versicolor
+6,3.1,4.4,1.4,Iris-versicolor
+3,3,4.5,1.5,Iris-versicolor
+4,2.7,4.1,1,Iris-versicolor
+5,2.2,4.5,1.5,Iris-versicolor
+3,2.5,3.9,1.1,Iris-versicolor
+4,3.2,4.8,1.8,Iris-versicolor
+5,2.8,4,1.3,Iris-versicolor
+5,2.5,4.9,1.5,Iris-versicolor
+5,2.8,4.7,1.2,Iris-versicolor
+5,2.9,4.3,1.3,Iris-versicolor
+6,3,4.4,1.4,Iris-versicolor
+6,2.8,4.8,1.4,Iris-versicolor
+6,3,5,1.7,Iris-versicolor
+4,2.9,4.5,1.5,Iris-versicolor
+3,2.6,3.5,1,Iris-versicolor
+3,2.4,3.8,1.1,Iris-versicolor
+3,2.4,3.7,1,Iris-versicolor
+4,2.7,3.9,1.2,Iris-versicolor
+4,2.7,5.1,1.6,Iris-versicolor
+3,3,4.5,1.5,Iris-versicolor
+4,3.4,4.5,1.6,Iris-versicolor
+6,3.1,4.7,1.5,Iris-versicolor
+5,2.3,4.4,1.3,Iris-versicolor
+3,3,4.1,1.3,Iris-versicolor
+3,2.5,4,1.3,Iris-versicolor
+3,2.6,4.4,1.2,Iris-versicolor
+5,3,4.6,1.4,Iris-versicolor
+4,2.6,4,1.2,Iris-versicolor
+1,2.3,3.3,1,Iris-versicolor
+3,2.7,4.2,1.3,Iris-versicolor
+3,3,4.2,1.2,Iris-versicolor
+3,2.9,4.2,1.3,Iris-versicolor
+5,2.9,4.3,1.3,Iris-versicolor
+2,2.5,3,1.1,Iris-versicolor
+3,2.8,4.1,1.3,Iris-versicolor
+5,3.3,6,2.5,Iris-virginica
+4,2.7,5.1,1.9,Iris-virginica
+7,3,5.9,2.1,Iris-virginica
+5,2.9,5.6,1.8,Iris-virginica
+6,3,5.8,2.2,Iris-virginica
+9,3,6.6,2.1,Iris-virginica
+1,2.5,4.5,1.7,Iris-virginica
+8,2.9,6.3,1.8,Iris-virginica
+6,2.5,5.8,1.8,Iris-virginica
+8,3.6,6.1,2.5,Iris-virginica
+6,3.2,5.1,2,Iris-virginica
+5,2.7,5.3,1.9,Iris-virginica
+6,3,5.5,2.1,Iris-virginica
+3,2.5,5,2,Iris-virginica
+4,2.8,5.1,2.4,Iris-virginica
+5,3.2,5.3,2.3,Iris-virginica
+6,3,5.5,1.8,Iris-virginica
+9,3.8,6.7,2.2,Iris-virginica
+9,2.6,6.9,2.3,Iris-virginica
+4,2.2,5,1.5,Iris-virginica
+7,3.2,5.7,2.3,Iris-virginica
+3,2.8,4.9,2,Iris-virginica
+9,2.8,6.7,2,Iris-virginica
+5,2.7,4.9,1.8,Iris-virginica
+6,3.3,5.7,2.1,Iris-virginica
+8,3.2,6,1.8,Iris-virginica
+5,2.8,4.8,1.8,Iris-virginica
+5,3,4.9,1.8,Iris-virginica
+5,2.8,5.6,2.1,Iris-virginica
+8,3,5.8,1.6,Iris-virginica
+8,2.8,6.1,1.9,Iris-virginica
+9,3.8,6.4,2,Iris-virginica
+5,2.8,5.6,2.2,Iris-virginica
+5,2.8,5.1,1.5,Iris-virginica
+5,2.6,5.6,1.4,Iris-virginica
+9,3,6.1,2.3,Iris-virginica
+5,3.4,5.6,2.4,Iris-virginica
+5,3.1,5.5,1.8,Iris-virginica
+4,3,4.8,1.8,Iris-virginica
+7,3.1,5.4,2.1,Iris-virginica
+6,3.1,5.6,2.4,Iris-virginica
+7,3.1,5.1,2.3,Iris-virginica
+4,2.7,5.1,1.9,Iris-virginica
+6,3.2,5.9,2.3,Iris-virginica
+6,3.3,5.7,2.5,Iris-virginica
+6,3,5.2,2.3,Iris-virginica
+5,2.5,5,1.9,Iris-virginica
+6,3,5.2,2,Iris-virginica
+5,3.4,5.4,2.3,Iris-virginica
+4,3,5.1,1.8,Iris-virginica
+
--- a/examples/datasets/iris_headers.csv
+++ b/examples/datasets/iris_headers.csv
@ -0,0 +1,151 @@
+Sepal length, Sepal width,Petal length, Petal width, Species
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.3,3.0,1.1,0.1,Iris-setosa
+5.8,4.0,1.2,0.2,Iris-setosa
+5.7,4.4,1.5,0.4,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.7,3.8,1.7,0.3,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+5.1,3.3,1.7,0.5,Iris-setosa
+4.8,3.4,1.9,0.2,Iris-setosa
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,3.4,1.4,0.2,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+5.4,3.4,1.5,0.4,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.5,4.2,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.5,3.5,1.3,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.1,3.8,1.9,0.4,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+5.3,3.7,1.5,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+7.0,3.2,4.7,1.4,Iris-versicolor
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.9,3.1,4.9,1.5,Iris-versicolor
+5.5,2.3,4.0,1.3,Iris-versicolor
+6.5,2.8,4.6,1.5,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+6.3,3.3,4.7,1.6,Iris-versicolor
+4.9,2.4,3.3,1.0,Iris-versicolor
+6.6,2.9,4.6,1.3,Iris-versicolor
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.0,2.0,3.5,1.0,Iris-versicolor
+5.9,3.0,4.2,1.5,Iris-versicolor
+6.0,2.2,4.0,1.0,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+5.6,2.9,3.6,1.3,Iris-versicolor
+6.7,3.1,4.4,1.4,Iris-versicolor
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.8,2.7,4.1,1.0,Iris-versicolor
+6.2,2.2,4.5,1.5,Iris-versicolor
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.9,3.2,4.8,1.8,Iris-versicolor
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.0,2.9,4.5,1.5,Iris-versicolor
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+6.0,2.7,5.1,1.6,Iris-versicolor
+5.4,3.0,4.5,1.5,Iris-versicolor
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.3,2.3,4.4,1.3,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.6,4.4,1.2,Iris-versicolor
+6.1,3.0,4.6,1.4,Iris-versicolor
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+6.2,2.9,4.3,1.3,Iris-versicolor
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+6.3,3.3,6.0,2.5,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+7.1,3.0,5.9,2.1,Iris-virginica
+6.3,2.9,5.6,1.8,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+4.9,2.5,4.5,1.7,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+6.7,2.5,5.8,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+6.5,3.2,5.1,2.0,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.8,3.0,5.5,2.1,Iris-virginica
+5.7,2.5,5.0,2.0,Iris-virginica
+5.8,2.8,5.1,2.4,Iris-virginica
+6.4,3.2,5.3,2.3,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+6.0,2.2,5.0,1.5,Iris-virginica
+6.9,3.2,5.7,2.3,Iris-virginica
+5.6,2.8,4.9,2.0,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+6.2,2.8,4.8,1.8,Iris-virginica
+6.1,3.0,4.9,1.8,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.9,3.8,6.4,2.0,Iris-virginica
+6.4,2.8,5.6,2.2,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.1,2.6,5.6,1.4,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.4,3.1,5.5,1.8,Iris-virginica
+6.0,3.0,4.8,1.8,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.7,3.1,5.6,2.4,Iris-virginica
+6.9,3.1,5.1,2.3,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+6.8,3.2,5.9,2.3,Iris-virginica
+6.7,3.3,5.7,2.5,Iris-virginica
+6.7,3.0,5.2,2.3,Iris-virginica
+6.3,2.5,5.0,1.9,Iris-virginica
+6.5,3.0,5.2,2.0,Iris-virginica
+6.2,3.4,5.4,2.3,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica
--- a/examples/datasets/iris_sorted_asc.csv
+++ b/examples/datasets/iris_sorted_asc.csv
@ -0,0 +1,151 @@
+Sepal length, Sepal width,Petal length, Petal width, Species
+4.3,3.0,1.1,0.1,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.4,1.9,0.2,Iris-setosa
+4.9,2.4,3.3,1.0,Iris-versicolor
+4.9,2.5,4.5,1.7,Iris-virginica
+4.9,3.0,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.0,2.0,3.5,1.0,Iris-versicolor
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.1,3.3,1.7,0.5,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.1,3.5,1.4,0.2,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+5.1,3.8,1.9,0.4,Iris-setosa
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.2,3.4,1.4,0.2,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.3,3.7,1.5,0.2,Iris-setosa
+5.4,3.0,4.5,1.5,Iris-versicolor
+5.4,3.4,1.5,0.4,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+5.5,2.3,4.0,1.3,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.6,4.4,1.2,Iris-versicolor
+5.5,3.5,1.3,0.2,Iris-setosa
+5.5,4.2,1.4,0.2,Iris-setosa
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.6,2.8,4.9,2.0,Iris-virginica
+5.6,2.9,3.6,1.3,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.7,2.5,5.0,2.0,Iris-virginica
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,3.8,1.7,0.3,Iris-setosa
+5.7,4.4,1.5,0.4,Iris-setosa
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+5.8,2.7,4.1,1.0,Iris-versicolor
+5.8,2.7,5.1,1.9,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+5.8,2.8,5.1,2.4,Iris-virginica
+5.8,4.0,1.2,0.2,Iris-setosa
+5.9,3.0,4.2,1.5,Iris-versicolor
+5.9,3.0,5.1,1.8,Iris-virginica
+5.9,3.2,4.8,1.8,Iris-versicolor
+6.0,2.2,4.0,1.0,Iris-versicolor
+6.0,2.2,5.0,1.5,Iris-virginica
+6.0,2.7,5.1,1.6,Iris-versicolor
+6.0,2.9,4.5,1.5,Iris-versicolor
+6.0,3.0,4.8,1.8,Iris-virginica
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.1,2.6,5.6,1.4,Iris-virginica
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+6.1,3.0,4.6,1.4,Iris-versicolor
+6.1,3.0,4.9,1.8,Iris-virginica
+6.2,2.2,4.5,1.5,Iris-versicolor
+6.2,2.8,4.8,1.8,Iris-virginica
+6.2,2.9,4.3,1.3,Iris-versicolor
+6.2,3.4,5.4,2.3,Iris-virginica
+6.3,2.3,4.4,1.3,Iris-versicolor
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.3,2.5,5.0,1.9,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.3,2.9,5.6,1.8,Iris-virginica
+6.3,3.3,4.7,1.6,Iris-versicolor
+6.3,3.3,6.0,2.5,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+6.4,2.8,5.6,2.2,Iris-virginica
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.4,3.1,5.5,1.8,Iris-virginica
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.4,3.2,5.3,2.3,Iris-virginica
+6.5,2.8,4.6,1.5,Iris-versicolor
+6.5,3.0,5.2,2.0,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+6.5,3.2,5.1,2.0,Iris-virginica
+6.6,2.9,4.6,1.3,Iris-versicolor
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.7,2.5,5.8,1.8,Iris-virginica
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.7,3.0,5.2,2.3,Iris-virginica
+6.7,3.1,4.4,1.4,Iris-versicolor
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.7,3.1,5.6,2.4,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+6.7,3.3,5.7,2.5,Iris-virginica
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.8,3.0,5.5,2.1,Iris-virginica
+6.8,3.2,5.9,2.3,Iris-virginica
+6.9,3.1,4.9,1.5,Iris-versicolor
+6.9,3.1,5.1,2.3,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.9,3.2,5.7,2.3,Iris-virginica
+7.0,3.2,4.7,1.4,Iris-versicolor
+7.1,3.0,5.9,2.1,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.9,3.8,6.4,2.0,Iris-virginica
--- a/examples/datasets/iris_sorted_desc.csv
+++ b/examples/datasets/iris_sorted_desc.csv
@ -0,0 +1,151 @@
+Sepal length, Sepal width,Petal length, Petal width, Species
+7.9,3.8,6.4,2.0,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.1,3.0,5.9,2.1,Iris-virginica
+7.0,3.2,4.7,1.4,Iris-versicolor
+6.9,3.2,5.7,2.3,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.9,3.1,5.1,2.3,Iris-virginica
+6.9,3.1,4.9,1.5,Iris-versicolor
+6.8,3.2,5.9,2.3,Iris-virginica
+6.8,3.0,5.5,2.1,Iris-virginica
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.7,3.3,5.7,2.5,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+6.7,3.1,5.6,2.4,Iris-virginica
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.7,3.1,4.4,1.4,Iris-versicolor
+6.7,3.0,5.2,2.3,Iris-virginica
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.7,2.5,5.8,1.8,Iris-virginica
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.6,2.9,4.6,1.3,Iris-versicolor
+6.5,3.2,5.1,2.0,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+6.5,3.0,5.2,2.0,Iris-virginica
+6.5,2.8,4.6,1.5,Iris-versicolor
+6.4,3.2,5.3,2.3,Iris-virginica
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.4,3.1,5.5,1.8,Iris-virginica
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.4,2.8,5.6,2.2,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.3,3.3,6.0,2.5,Iris-virginica
+6.3,3.3,4.7,1.6,Iris-versicolor
+6.3,2.9,5.6,1.8,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.3,2.5,5.0,1.9,Iris-virginica
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.3,2.3,4.4,1.3,Iris-versicolor
+6.2,3.4,5.4,2.3,Iris-virginica
+6.2,2.9,4.3,1.3,Iris-versicolor
+6.2,2.8,4.8,1.8,Iris-virginica
+6.2,2.2,4.5,1.5,Iris-versicolor
+6.1,3.0,4.9,1.8,Iris-virginica
+6.1,3.0,4.6,1.4,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.1,2.6,5.6,1.4,Iris-virginica
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.0,3.0,4.8,1.8,Iris-virginica
+6.0,2.9,4.5,1.5,Iris-versicolor
+6.0,2.7,5.1,1.6,Iris-versicolor
+6.0,2.2,5.0,1.5,Iris-virginica
+6.0,2.2,4.0,1.0,Iris-versicolor
+5.9,3.2,4.8,1.8,Iris-versicolor
+5.9,3.0,5.1,1.8,Iris-virginica
+5.9,3.0,4.2,1.5,Iris-versicolor
+5.8,4.0,1.2,0.2,Iris-setosa
+5.8,2.8,5.1,2.4,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+5.8,2.7,4.1,1.0,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.7,4.4,1.5,0.4,Iris-setosa
+5.7,3.8,1.7,0.3,Iris-setosa
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.7,2.5,5.0,2.0,Iris-virginica
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.6,2.9,3.6,1.3,Iris-versicolor
+5.6,2.8,4.9,2.0,Iris-virginica
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.5,4.2,1.4,0.2,Iris-setosa
+5.5,3.5,1.3,0.2,Iris-setosa
+5.5,2.6,4.4,1.2,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.5,2.3,4.0,1.3,Iris-versicolor
+5.4,3.9,1.7,0.4,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.4,3.4,1.5,0.4,Iris-setosa
+5.4,3.0,4.5,1.5,Iris-versicolor
+5.3,3.7,1.5,0.2,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,3.4,1.4,0.2,Iris-setosa
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.1,3.8,1.9,0.4,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.1,3.5,1.4,0.2,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.1,3.3,1.7,0.5,Iris-setosa
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.0,3.6,1.4,0.2,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.0,2.0,3.5,1.0,Iris-versicolor
+4.9,3.1,1.5,0.1,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.9,2.5,4.5,1.7,Iris-virginica
+4.9,2.4,3.3,1.0,Iris-versicolor
+4.8,3.4,1.9,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.3,3.0,1.1,0.1,Iris-setosa
--- a/examples/instances/instances.go
+++ b/examples/instances/instances.go
@ -0,0 +1,69 @@
+package main
+
+// This example program demonstrates Instances
+
+import (
+	"fmt"
+	base "github.com/sjwhitworth/golearn/base"
+)
+
+func main() {
+
+	// Instances can be read using ParseCsvToInstances
+	rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
+	if err != nil {
+		panic(err)
+	}
+
+	// Instances can be printed, and you'll see a human-readable summary
+	// if you do so. The first section is a line like
+	//     Instances with 150 row(s) and 5 attribute(s)
+	//
+	// It next prints all the attributes
+	//     FloatAttribute(Sepal length)
+	//     FloatAttribute(Sepal width)
+	//     FloatAttribute(Petal length)
+	//     FloatAttribute(Petal width)
+	//     CategoricalAttribute([Iris-setosa Iris-versicolor Iris-viriginica])
+	// The final attribute has an asterisk (*) printed before it,
+	// meaning that it is the class variable. It then prints out up to
+	// 30 rows which correspond to those attributes.
+	// 	5.10 3.50 1.40 0.20 Iris-setosa
+	// 	4.90 3.00 1.40 0.20 Iris-setosa
+	fmt.Println(rawData)
+
+	// If two decimal places isn't enough, you can update the
+	// Precision field on any FloatAttribute
+	if attr, ok := rawData.GetAttr(0).(*base.FloatAttribute); !ok {
+		panic("Invalid cast")
+	} else {
+		attr.Precision = 4
+	}
+	// Now the first column has more precision
+	fmt.Println(rawData)
+
+	// We can update the set of Instances, although the API
+	// for doing so is not very sophisticated.
+	rawData.SetAttrStr(0, 0, "1.00")
+	rawData.SetAttrStr(0, rawData.ClassIndex, "Iris-unusual")
+	fmt.Println(rawData)
+
+	// There is a way of creating new Instances from scratch.
+	// Inside an Instance, everything's stored as float64
+	newData := make([]float64, 2)
+	newData[0] = 1.0
+	newData[1] = 0.0
+
+	// Let's create some attributes
+	attrs := make([]base.Attribute, 2)
+	attrs[0] = base.NewFloatAttribute()
+	attrs[0].SetName("Arbitrary Float Quantity")
+	attrs[1] = new(base.CategoricalAttribute)
+	attrs[1].SetName("Class")
+	// Insert a standard class
+	attrs[1].GetSysValFromString("A")
+
+	// Now let's create the final instances set
+	newInst := base.NewInstancesFromRaw(attrs, 1, newData)
+	fmt.Println(newInst)
+}
--- a/examples/knnclassifier/knnclassifier_iris.go
+++ b/examples/knnclassifier/knnclassifier_iris.go
@ -0,0 +1,32 @@
+package main
+
+import (
+	"fmt"
+	base "github.com/sjwhitworth/golearn/base"
+	evaluation "github.com/sjwhitworth/golearn/evaluation"
+	knn "github.com/sjwhitworth/golearn/knn"
+)
+
+func main() {
+	rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true)
+	if err != nil {
+		panic(err)
+	}
+	rawData.Shuffle()
+	//Initialises a new KNN classifier
+	cls := knn.NewKnnClassifier("euclidean", 2)
+
+	//Do a training-test split
+	trainTest := base.InstancesTrainTestSplit(rawData, 0.50)
+	trainData := trainTest[0]
+	testData := trainTest[1]
+	cls.Fit(trainData)
+
+	//Calculates the Euclidean distance and returns the most popular label
+	predictions := cls.Predict(testData)
+	fmt.Println(predictions)
+
+	// Prints precision/recall metrics
+	confusionMat := evaluation.GetConfusionMatrix(testData, predictions)
+	fmt.Println(evaluation.GetSummary(confusionMat))
+}
--- a/examples/knnclassifier_iris.go
+++ b/examples/knnclassifier_iris.go
@ -1,27 +0,0 @@
-package main
-
-import (
-	"fmt"
-
-	data "github.com/sjwhitworth/golearn/data"
-	knn "github.com/sjwhitworth/golearn/knn"
-	util "github.com/sjwhitworth/golearn/utilities"
-)
-
-func main() {
-	//Parses the infamous Iris data.
-	cols, rows, _, labels, data := data.ParseCsv("datasets/iris.csv", 4, []int{0, 1, 2})
-
-	//Initialises a new KNN classifier
-	cls := knn.NewKnnClassifier("euclidean")
-	cls.Fit(labels, data, rows, cols)
-
-	for {
-		//Creates a random array of N float64s between 0 and 7
-		randArray := util.RandomArray(3, 7)
-
-		//Calculates the Euclidean distance and returns the most popular label
-		labels := cls.Predict(randArray, 3)
-		fmt.Println(labels)
-	}
-}
--- a/examples/knnregressor_random.go
+++ b/examples/knnregressor_random.go
@ -1,32 +0,0 @@
-package main
-
-import (
-	"fmt"
-
-	"github.com/gonum/matrix/mat64"
-	data "github.com/sjwhitworth/golearn/data"
-	knn "github.com/sjwhitworth/golearn/knn"
-	util "github.com/sjwhitworth/golearn/utilities"
-)
-
-func main() {
-	//Parses the infamous Iris data.
-	cols, rows, _, labels, data := data.ParseCsv("datasets/randomdata.csv", 2, []int{0, 1})
-	newlabels := util.ConvertLabelsToFloat(labels)
-
-	//Initialises a new KNN classifier
-	cls := knn.NewKnnRegressor("euclidean")
-	cls.Fit(newlabels, data, rows, cols)
-
-	for {
-		//Creates a random array of N float64s between 0 and Y
-		randArray := util.RandomArray(2, 100)
-
-		//Initialises a vector with this array
-		random := mat64.NewDense(1, 2, randArray)
-
-		//Calculates the Euclidean distance and returns the most popular label
-		outcome := cls.Predict(random, 3)
-		fmt.Println(outcome)
-	}
-}
--- a/ext/lib/liblinear.so
+++ b/ext/lib/liblinear.so
--- a/ext/lib/linear.dll
+++ b/ext/lib/linear.dll
--- a/ext/liblinear_src/blas/blas.a
+++ b/ext/liblinear_src/blas/blas.a
--- a/ext/liblinear_src/blas/daxpy.o
+++ b/ext/liblinear_src/blas/daxpy.o
--- a/ext/liblinear_src/blas/ddot.o
+++ b/ext/liblinear_src/blas/ddot.o
--- a/ext/liblinear_src/blas/dnrm2.o
+++ b/ext/liblinear_src/blas/dnrm2.o
--- a/ext/liblinear_src/blas/dscal.o
+++ b/ext/liblinear_src/blas/dscal.o
--- a/ext/liblinear_src/linear.dll
+++ b/ext/liblinear_src/linear.dll
--- a/ext/liblinear_src/linear.o
+++ b/ext/liblinear_src/linear.o
--- a/ext/liblinear_src/tron.o
+++ b/ext/liblinear_src/tron.o
--- a/filters/binning.go
+++ b/filters/binning.go
@ -0,0 +1,121 @@
+package filters
+
+import (
+	"fmt"
+	base "github.com/sjwhitworth/golearn/base"
+	"math"
+)
+
+// BinningFilter does equal-width binning for numeric
+// Attributes (aka "histogram binning")
+type BinningFilter struct {
+	Attributes []int
+	Instances  *base.Instances
+	BinCount   int
+	MinVals    map[int]float64
+	MaxVals    map[int]float64
+	trained    bool
+}
+
+// NewBinningFilter creates a BinningFilter structure
+// with some helpful default initialisations.
+func NewBinningFilter(inst *base.Instances, bins int) BinningFilter {
+	return BinningFilter{
+		make([]int, 0),
+		inst,
+		bins,
+		make(map[int]float64),
+		make(map[int]float64),
+		false,
+	}
+}
+
+// AddAttribute adds the index of the given attribute `a'
+// to the BinningFilter for discretisation.
+func (b *BinningFilter) AddAttribute(a base.Attribute) {
+	attrIndex := b.Instances.GetAttrIndex(a)
+	if attrIndex == -1 {
+		panic("invalid attribute")
+	}
+	b.Attributes = append(b.Attributes, attrIndex)
+}
+
+// AddAllNumericAttributes adds every suitable attribute
+// to the BinningFilter for discretiation
+func (b *BinningFilter) AddAllNumericAttributes() {
+	for i := 0; i < b.Instances.Cols; i++ {
+		if i == b.Instances.ClassIndex {
+			continue
+		}
+		attr := b.Instances.GetAttr(i)
+		if attr.GetType() != base.Float64Type {
+			continue
+		}
+		b.Attributes = append(b.Attributes, i)
+	}
+}
+
+// Build computes and stores the bin values
+// for the training instances.
+func (b *BinningFilter) Build() {
+	for _, attr := range b.Attributes {
+		maxVal := math.Inf(-1)
+		minVal := math.Inf(1)
+		for i := 0; i < b.Instances.Rows; i++ {
+			val := b.Instances.Get(i, attr)
+			if val > maxVal {
+				maxVal = val
+			}
+			if val < minVal {
+				minVal = val
+			}
+		}
+		b.MaxVals[attr] = maxVal
+		b.MinVals[attr] = minVal
+		b.trained = true
+	}
+}
+
+// Run applies a trained BinningFilter to a set of Instances,
+// discretising any numeric attributes added.
+//
+// IMPORTANT: Run discretises in-place, so make sure to take
+// a copy if the original instances are still needed
+//
+// IMPORTANT: This function panic()s if the filter has not been
+// trained. Call Build() before running this function
+//
+// IMPORTANT: Call Build() after adding any additional attributes.
+// Otherwise, the training structure will be out of date from
+// the values expected and could cause a panic.
+func (b *BinningFilter) Run(on *base.Instances) {
+	if !b.trained {
+		panic("Call Build() beforehand")
+	}
+	for attr := range b.Attributes {
+		minVal := b.MinVals[attr]
+		maxVal := b.MaxVals[attr]
+		disc := 0
+		// Casts to float32 to replicate a floating point precision error
+		delta := float32(maxVal - minVal)
+		delta /= float32(b.BinCount)
+		for i := 0; i < on.Rows; i++ {
+			val := on.Get(i, attr)
+			if val <= minVal {
+				disc = 0
+			} else {
+				disc = int(math.Floor(float64(float32(val-minVal) / delta)))
+				if disc >= b.BinCount {
+					disc = b.BinCount - 1
+				}
+			}
+			on.Set(i, attr, float64(disc))
+		}
+		newAttribute := new(base.CategoricalAttribute)
+		newAttribute.SetName(on.GetAttr(attr).GetName())
+		for i := 0; i < b.BinCount; i++ {
+			newAttribute.GetSysValFromString(fmt.Sprintf("%d", i))
+		}
+		on.ReplaceAttr(attr, newAttribute)
+	}
+}
--- a/filters/binning_test.go
+++ b/filters/binning_test.go
@ -0,0 +1,28 @@
+package filters
+
+import (
+	base "github.com/sjwhitworth/golearn/base"
+	"math"
+	"testing"
+)
+
+func TestBinning(testEnv *testing.T) {
+	inst1, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
+	inst2, err := base.ParseCSVToInstances("../examples/datasets/iris_binned.csv", true)
+	inst3, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
+	if err != nil {
+		panic(err)
+	}
+	filt := NewBinningFilter(inst1, 10)
+	filt.AddAttribute(inst1.GetAttr(0))
+	filt.Build()
+	filt.Run(inst1)
+	for i := 0; i < inst1.Rows; i++ {
+		val1 := inst1.Get(i, 0)
+		val2 := inst2.Get(i, 0)
+		val3 := inst3.Get(i, 0)
+		if math.Abs(val1-val2) >= 1 {
+			testEnv.Error(val1, val2, val3, i)
+		}
+	}
+}
--- a/filters/chimerge.go
+++ b/filters/chimerge.go
@ -0,0 +1,365 @@
+package filters
+
+import (
+	"fmt"
+	base "github.com/sjwhitworth/golearn/base"
+	"math"
+)
+
+// ChiMergeFilter implements supervised discretisation
+// by merging successive numeric intervals if the difference
+// in their class distribution is not statistically signficant.
+// See Bramer, "Principles of Data Mining", 2nd Edition
+//  pp 105--115
+type ChiMergeFilter struct {
+	Attributes   []int
+	Instances    *base.Instances
+	Tables       map[int][]*FrequencyTableEntry
+	Significance float64
+	MinRows      int
+	MaxRows      int
+	_Trained     bool
+}
+
+// Create a ChiMergeFilter with some helpful intialisations.
+func NewChiMergeFilter(inst *base.Instances, significance float64) ChiMergeFilter {
+	return ChiMergeFilter{
+		make([]int, 0),
+		inst,
+		make(map[int][]*FrequencyTableEntry),
+		significance,
+		0,
+		0,
+		false,
+	}
+}
+
+// Build trains a ChiMergeFilter on the ChiMergeFilter.Instances given
+func (c *ChiMergeFilter) Build() {
+	for _, attr := range c.Attributes {
+		tab := chiMerge(c.Instances, attr, c.Significance, c.MinRows, c.MaxRows)
+		c.Tables[attr] = tab
+		c._Trained = true
+	}
+}
+
+// Run discretises the set of Instances `on'
+//
+// IMPORTANT: ChiMergeFilter discretises in place.
+func (c *ChiMergeFilter) Run(on *base.Instances) {
+	if !c._Trained {
+		panic("Call Build() beforehand")
+	}
+	for attr := range c.Tables {
+		table := c.Tables[attr]
+		for i := 0; i < on.Rows; i++ {
+			val := on.Get(i, attr)
+			dis := 0
+			for j, k := range table {
+				if k.Value < val {
+					dis = j
+					continue
+				}
+				break
+			}
+			on.Set(i, attr, float64(dis))
+		}
+		newAttribute := new(base.CategoricalAttribute)
+		newAttribute.SetName(on.GetAttr(attr).GetName())
+		for _, k := range table {
+			newAttribute.GetSysValFromString(fmt.Sprintf("%f", k.Value))
+		}
+		on.ReplaceAttr(attr, newAttribute)
+	}
+}
+
+// AddAttribute add a given numeric Attribute `attr' to the
+// filter.
+//
+// IMPORTANT: This function panic()s if it can't locate the
+// attribute in the Instances set.
+func (c *ChiMergeFilter) AddAttribute(attr base.Attribute) {
+	if attr.GetType() != base.Float64Type {
+		panic("ChiMerge only works on Float64Attributes")
+	}
+	attrIndex := c.Instances.GetAttrIndex(attr)
+	if attrIndex == -1 {
+		panic("Invalid attribute!")
+	}
+	c.Attributes = append(c.Attributes, attrIndex)
+}
+
+type FrequencyTableEntry struct {
+	Value     float64
+	Frequency map[string]int
+}
+
+func (t *FrequencyTableEntry) String() string {
+	return fmt.Sprintf("%.2f %s", t.Value, t.Frequency)
+}
+
+func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEntry {
+	ret := make([]*FrequencyTableEntry, 0)
+	var attribute *base.FloatAttribute
+	attribute, ok := inst.GetAttr(attr).(*base.FloatAttribute)
+	if !ok {
+		panic("only use Chi-M on numeric stuff")
+	}
+	for i := 0; i < inst.Rows; i++ {
+		value := inst.Get(i, attr)
+		valueConv := attribute.GetUsrVal(value)
+		class := inst.GetClass(i)
+		// Search the frequency table for the value
+		found := false
+		for _, entry := range ret {
+			if entry.Value == valueConv {
+				found = true
+				entry.Frequency[class] += 1
+			}
+		}
+		if !found {
+			newEntry := &FrequencyTableEntry{
+				valueConv,
+				make(map[string]int),
+			}
+			newEntry.Frequency[class] = 1
+			ret = append(ret, newEntry)
+		}
+	}
+
+	return ret
+}
+
+func chiSquaredPdf(k float64, x float64) float64 {
+	if x < 0 {
+		return 0
+	}
+	top := math.Pow(x, (k/2)-1) * math.Exp(-x/2)
+	bottom := math.Pow(2, k/2) * math.Gamma(k/2)
+	return top / bottom
+}
+
+func chiSquaredPercentile(k int, x float64) float64 {
+	// Implements Yahya et al.'s "A Numerical Procedure
+	//  for Computing Chi-Square Percentage Points"
+	// InterStat Journal 01/2007; April 25:page:1-8.
+	steps := 32
+	intervals := 4 * steps
+	w := x / (4.0 * float64(steps))
+	values := make([]float64, intervals+1)
+	for i := 0; i < intervals+1; i++ {
+		c := w * float64(i)
+		v := chiSquaredPdf(float64(k), c)
+		values[i] = v
+	}
+
+	ret1 := values[0] + values[len(values)-1]
+	ret2 := 0.0
+	ret3 := 0.0
+	ret4 := 0.0
+
+	for i := 2; i < intervals-1; i += 4 {
+		ret2 += values[i]
+	}
+
+	for i := 4; i < intervals-3; i += 4 {
+		ret3 += values[i]
+	}
+
+	for i := 1; i < intervals; i += 2 {
+		ret4 += values[i]
+	}
+
+	return (2.0 * w / 45) * (7*ret1 + 12*ret2 + 14*ret3 + 32*ret4)
+}
+
+func chiCountClasses(entries []*FrequencyTableEntry) map[string]int {
+	classCounter := make(map[string]int)
+	for _, e := range entries {
+		for k := range e.Frequency {
+			classCounter[k] += e.Frequency[k]
+		}
+	}
+	return classCounter
+}
+
+func chiComputeStatistic(entry1 *FrequencyTableEntry, entry2 *FrequencyTableEntry) float64 {
+
+	// Sum the number of things observed per class
+	classCounter := make(map[string]int)
+	for k := range entry1.Frequency {
+		classCounter[k] += entry1.Frequency[k]
+	}
+	for k := range entry2.Frequency {
+		classCounter[k] += entry2.Frequency[k]
+	}
+
+	// Sum the number of things observed per value
+	entryObservations1 := 0
+	entryObservations2 := 0
+	for k := range entry1.Frequency {
+		entryObservations1 += entry1.Frequency[k]
+	}
+	for k := range entry2.Frequency {
+		entryObservations2 += entry2.Frequency[k]
+	}
+
+	totalObservations := entryObservations1 + entryObservations2
+	// Compute the expected values per class
+	expectedClassValues1 := make(map[string]float64)
+	expectedClassValues2 := make(map[string]float64)
+	for k := range classCounter {
+		expectedClassValues1[k] = float64(classCounter[k])
+		expectedClassValues1[k] *= float64(entryObservations1)
+		expectedClassValues1[k] /= float64(totalObservations)
+	}
+	for k := range classCounter {
+		expectedClassValues2[k] = float64(classCounter[k])
+		expectedClassValues2[k] *= float64(entryObservations2)
+		expectedClassValues2[k] /= float64(totalObservations)
+	}
+
+	// Compute chi-squared value
+	chiSum := 0.0
+	for k := range expectedClassValues1 {
+		numerator := float64(entry1.Frequency[k])
+		numerator -= expectedClassValues1[k]
+		numerator = math.Pow(numerator, 2)
+		denominator := float64(expectedClassValues1[k])
+		if denominator < 0.5 {
+			denominator = 0.5
+		}
+		chiSum += numerator / denominator
+	}
+	for k := range expectedClassValues2 {
+		numerator := float64(entry2.Frequency[k])
+		numerator -= expectedClassValues2[k]
+		numerator = math.Pow(numerator, 2)
+		denominator := float64(expectedClassValues2[k])
+		if denominator < 0.5 {
+			denominator = 0.5
+		}
+		chiSum += numerator / denominator
+	}
+
+	return chiSum
+}
+
+func chiMergeMergeZipAdjacent(freq []*FrequencyTableEntry, minIndex int) []*FrequencyTableEntry {
+	mergeEntry1 := freq[minIndex]
+	mergeEntry2 := freq[minIndex+1]
+	classCounter := make(map[string]int)
+	for k := range mergeEntry1.Frequency {
+		classCounter[k] += mergeEntry1.Frequency[k]
+	}
+	for k := range mergeEntry2.Frequency {
+		classCounter[k] += mergeEntry2.Frequency[k]
+	}
+	newVal := freq[minIndex].Value
+	newEntry := &FrequencyTableEntry{
+		newVal,
+		classCounter,
+	}
+	lowerSlice := freq
+	upperSlice := freq
+	if minIndex > 0 {
+		lowerSlice = freq[0:minIndex]
+		upperSlice = freq[minIndex+1:]
+	} else {
+		lowerSlice = make([]*FrequencyTableEntry, 0)
+		upperSlice = freq[1:]
+	}
+	upperSlice[0] = newEntry
+	freq = append(lowerSlice, upperSlice...)
+	return freq
+}
+
+func chiMergePrintTable(freq []*FrequencyTableEntry) {
+	classes := chiCountClasses(freq)
+	fmt.Printf("Attribute value\t")
+	for k := range classes {
+		fmt.Printf("\t%s", k)
+	}
+	fmt.Printf("\tTotal\n")
+	for _, f := range freq {
+		fmt.Printf("%.2f\t", f.Value)
+		total := 0
+		for k := range classes {
+			fmt.Printf("\t%d", f.Frequency[k])
+			total += f.Frequency[k]
+		}
+		fmt.Printf("\t%d\n", total)
+	}
+}
+
+// Produces a value mapping table
+//   inst: The base.Instances which need discretising
+//   sig:  The significance level (e.g. 0.95)
+//   minrows: The minimum number of rows required in the frequency table
+//   maxrows: The maximum number of rows allowed in the frequency table
+//            If the number of rows is above this, statistically signficant
+//            adjacent rows will be merged
+//   precision: internal number of decimal places to round E value to
+//              (useful for verification)
+func chiMerge(inst *base.Instances, attr int, sig float64, minrows int, maxrows int) []*FrequencyTableEntry {
+
+	// Parameter sanity checking
+	if !(2 <= minrows) {
+		minrows = 2
+	}
+	if !(minrows < maxrows) {
+		maxrows = minrows + 1
+	}
+	if sig == 0 {
+		sig = 10
+	}
+
+	// Build a frequency table
+	freq := ChiMBuildFrequencyTable(attr, inst)
+	// Count the number of classes
+	classes := chiCountClasses(freq)
+	for {
+		// chiMergePrintTable(freq) DEBUG
+		if len(freq) <= minrows {
+			break
+		}
+		minChiVal := math.Inf(1)
+		// There may be more than one index to merge
+		minChiIndexes := make([]int, 0)
+		for i := 0; i < len(freq)-1; i++ {
+			chiVal := chiComputeStatistic(freq[i], freq[i+1])
+			if chiVal < minChiVal {
+				minChiVal = chiVal
+				minChiIndexes = make([]int, 0)
+			}
+			if chiVal == minChiVal {
+				minChiIndexes = append(minChiIndexes, i)
+			}
+		}
+		// Only merge if:
+		//  We're above the maximum number of rows
+		//  OR the chiVal is significant
+		//   AS LONG AS we're above the minimum row count
+		merge := false
+		if len(freq) > maxrows {
+			merge = true
+		}
+		// Compute the degress of freedom |classes - 1| * |rows - 1|
+		degsOfFree := len(classes) - 1
+		sigVal := chiSquaredPercentile(degsOfFree, minChiVal)
+		if sigVal < sig {
+			merge = true
+		}
+		// If we don't need to merge, then break
+		if !merge {
+			break
+		}
+		// Otherwise merge the rows i, i+1 by taking
+		//  The higher of the two things as the value
+		//  Combining the class frequencies
+		for i, v := range minChiIndexes {
+			freq = chiMergeMergeZipAdjacent(freq, v-i)
+		}
+	}
+	return freq
+}
--- a/filters/chimerge_test.go
+++ b/filters/chimerge_test.go
@ -0,0 +1,149 @@
+package filters
+
+import (
+	"fmt"
+	base "github.com/sjwhitworth/golearn/base"
+	"math"
+	"testing"
+)
+
+func TestChiMFreqTable(testEnv *testing.T) {
+
+	inst, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
+	if err != nil {
+		panic(err)
+	}
+
+	freq := ChiMBuildFrequencyTable(0, inst)
+
+	if freq[0].Frequency["c1"] != 1 {
+		testEnv.Error("Wrong frequency")
+	}
+	if freq[0].Frequency["c3"] != 4 {
+		testEnv.Error("Wrong frequency %s", freq[1])
+	}
+	if freq[10].Frequency["c2"] != 1 {
+		testEnv.Error("Wrong frequency")
+	}
+}
+
+func TestChiClassCounter(testEnv *testing.T) {
+	inst, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
+	if err != nil {
+		panic(err)
+	}
+	freq := ChiMBuildFrequencyTable(0, inst)
+	classes := chiCountClasses(freq)
+	if classes["c1"] != 27 {
+		testEnv.Error(classes)
+	}
+	if classes["c2"] != 12 {
+		testEnv.Error(classes)
+	}
+	if classes["c3"] != 21 {
+		testEnv.Error(classes)
+	}
+}
+
+func TestStatisticValues(testEnv *testing.T) {
+	inst, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
+	if err != nil {
+		panic(err)
+	}
+	freq := ChiMBuildFrequencyTable(0, inst)
+	chiVal := chiComputeStatistic(freq[5], freq[6])
+	if math.Abs(chiVal-1.89) > 0.01 {
+		testEnv.Error(chiVal)
+	}
+
+	chiVal = chiComputeStatistic(freq[1], freq[2])
+	if math.Abs(chiVal-1.08) > 0.01 {
+		testEnv.Error(chiVal)
+	}
+}
+
+func TestChiSquareDistValues(testEnv *testing.T) {
+	chiVal1 := chiSquaredPercentile(2, 4.61)
+	chiVal2 := chiSquaredPercentile(3, 7.82)
+	chiVal3 := chiSquaredPercentile(4, 13.28)
+	if math.Abs(chiVal1-0.90) > 0.001 {
+		testEnv.Error(chiVal1)
+	}
+	if math.Abs(chiVal2-0.95) > 0.001 {
+		testEnv.Error(chiVal2)
+	}
+	if math.Abs(chiVal3-0.99) > 0.001 {
+		testEnv.Error(chiVal3)
+	}
+}
+
+func TestChiMerge1(testEnv *testing.T) {
+	// See Bramer, Principles of Machine Learning
+	inst, err := base.ParseCSVToInstances("../examples/datasets/chim.csv", true)
+	if err != nil {
+		panic(err)
+	}
+	freq := chiMerge(inst, 0, 0.90, 0, inst.Rows)
+	if len(freq) != 3 {
+		testEnv.Error("Wrong length")
+	}
+	if freq[0].Value != 1.3 {
+		testEnv.Error(freq[0])
+	}
+	if freq[1].Value != 56.2 {
+		testEnv.Error(freq[1])
+	}
+	if freq[2].Value != 87.1 {
+		testEnv.Error(freq[2])
+	}
+}
+
+func TestChiMerge2(testEnv *testing.T) {
+	//
+	// See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf
+	//   Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992
+	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
+	if err != nil {
+		panic(err)
+	}
+	attrs := make([]int, 1)
+	attrs[0] = 0
+	inst.Sort(base.Ascending, attrs)
+	freq := chiMerge(inst, 0, 0.90, 0, inst.Rows)
+	if len(freq) != 5 {
+		testEnv.Error("Wrong length (%d)", len(freq))
+		testEnv.Error(freq)
+	}
+	if freq[0].Value != 4.3 {
+		testEnv.Error(freq[0])
+	}
+	if freq[1].Value != 5.5 {
+		testEnv.Error(freq[1])
+	}
+	if freq[2].Value != 5.8 {
+		testEnv.Error(freq[2])
+	}
+	if freq[3].Value != 6.3 {
+		testEnv.Error(freq[3])
+	}
+	if freq[4].Value != 7.1 {
+		testEnv.Error(freq[4])
+	}
+}
+
+func TestChiMerge3(testEnv *testing.T) {
+	// See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf
+	//   Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992
+	inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true)
+	if err != nil {
+		panic(err)
+	}
+	attrs := make([]int, 1)
+	attrs[0] = 0
+	inst.Sort(base.Ascending, attrs)
+	filt := NewChiMergeFilter(inst, 0.90)
+	filt.AddAttribute(inst.GetAttr(0))
+	filt.Build()
+	filt.Run(inst)
+	fmt.Println(inst)
+}
--- a/knn/knn.go
+++ b/knn/knn.go
@ -14,44 +14,42 @@ import (
 // The accepted distance functions at this time are 'euclidean' and 'manhattan'.
 type KNNClassifier struct {
 	base.BaseEstimator
-	Labels       []string
-	DistanceFunc string
+	TrainingData      *base.Instances
+	DistanceFunc      string
+	NearestNeighbours int
 }

 // Returns a new classifier
-func NewKnnClassifier(distfunc string) *KNNClassifier {
+func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
 	KNN := KNNClassifier{}
 	KNN.DistanceFunc = distfunc
+	KNN.NearestNeighbours = neighbours
 	return &KNN
 }

-func (KNN *KNNClassifier) Fit(labels []string, numbers []float64, rows int, cols int) {
-	if rows != len(labels) {
-		panic(mat64.ErrShape)
-	}
-
-	KNN.Data = mat64.NewDense(rows, cols, numbers)
-	KNN.Labels = labels
+// Train stores the training data for llater
+func (KNN *KNNClassifier) Fit(trainingData *base.Instances) {
+	KNN.TrainingData = trainingData
 }

 // Returns a classification for the vector, based on a vector input, using the KNN algorithm.
 // See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
-func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
+func (KNN *KNNClassifier) PredictOne(vector []float64) string {

-	convertedVector := util.FloatsToMatrix(vector)
-	// Get the number of rows
-	rows, _ := KNN.Data.Dims()
+	rows := KNN.TrainingData.Rows
 	rownumbers := make(map[int]float64)
 	labels := make([]string, 0)
 	maxmap := make(map[string]int)

+	convertedVector := util.FloatsToMatrix(vector)
+
 	// Check what distance function we are using
 	switch KNN.DistanceFunc {
 	case "euclidean":
 		{
 			euclidean := pairwiseMetrics.NewEuclidean()
 			for i := 0; i < rows; i++ {
-				row := KNN.Data.RowView(i)
+				row := KNN.TrainingData.GetRowVectorWithoutClass(i)
 				rowMat := util.FloatsToMatrix(row)
 				distance := euclidean.Distance(rowMat, convertedVector)
 				rownumbers[i] = distance
@ -61,7 +59,7 @@ func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
 		{
 			manhattan := pairwiseMetrics.NewEuclidean()
 			for i := 0; i < rows; i++ {
-				row := KNN.Data.RowView(i)
+				row := KNN.TrainingData.GetRowVectorWithoutClass(i)
 				rowMat := util.FloatsToMatrix(row)
 				distance := manhattan.Distance(rowMat, convertedVector)
 				rownumbers[i] = distance
@ -70,16 +68,16 @@ func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
 	}

 	sorted := util.SortIntMap(rownumbers)
-	values := sorted[:K]
+	values := sorted[:KNN.NearestNeighbours]

 	for _, elem := range values {
-		// It's when we access this map
-		labels = append(labels, KNN.Labels[elem])
+		label := KNN.TrainingData.GetClass(elem)
+		labels = append(labels, label)

-		if _, ok := maxmap[KNN.Labels[elem]]; ok {
-			maxmap[KNN.Labels[elem]] += 1
+		if _, ok := maxmap[label]; ok {
+			maxmap[label] += 1
 		} else {
-			maxmap[KNN.Labels[elem]] = 1
+			maxmap[label] = 1
 		}
 	}

@ -89,6 +87,14 @@ func (KNN *KNNClassifier) Predict(vector []float64, K int) string {
 	return label
 }

+func (KNN *KNNClassifier) Predict(what *base.Instances) *base.Instances {
+	ret := what.GeneratePredictionVector()
+	for i := 0; i < what.Rows; i++ {
+		ret.SetAttrStr(i, 0, KNN.PredictOne(what.GetRowVectorWithoutClass(i)))
+	}
+	return ret
+}
+
 //A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
 type KNNRegressor struct {
 	base.BaseEstimator
@ -112,7 +118,6 @@ func (KNN *KNNRegressor) Fit(values []float64, numbers []float64, rows int, cols
 	KNN.Values = values
 }

-//Returns an average of the K nearest labels/variables, based on a vector input.
 func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {

 	// Get the number of rows
--- a/knn/knn_test.csv
+++ b/knn/knn_test.csv
@ -0,0 +1,2 @@
+1.2,1.2,1.5,blue
+5,5,5,red
--- a/knn/knn_test.go
+++ b/knn/knn_test.go
@ -1,30 +1,39 @@
 package knn

 import (
-	"testing"
-
+	"github.com/sjwhitworth/golearn/base"
 	. "github.com/smartystreets/goconvey/convey"
+	"testing"
 )

 func TestKnnClassifier(t *testing.T) {
 	Convey("Given labels, a classifier and data", t, func() {
-		labels := []string{"blue", "blue", "red", "red"}
-		data := []float64{1, 1, 1, 1, 1, 1, 3, 3, 3, 6, 6, 6}
-		cls := NewKnnClassifier(labels, data, 4, 3, "euclidean")
+
+		trainingData, err1 := base.ParseCSVToInstances("knn_train.csv", false)
+		testingData, err2 := base.ParseCSVToInstances("knn_test.csv", false)
+
+		if err1 != nil {
+			t.Error(err1)
+			return
+		}
+		if err2 != nil {
+			t.Error(err2)
+			return
+		}
+
+		cls := NewKnnClassifier("euclidean", 2)
+		cls.Fit(trainingData)
+		predictions := cls.Predict(testingData)

 		Convey("When predicting the label for our first vector", func() {
-			// The vector we're going to predict
-			vector := []float64{1.2, 1.2, 1.5}
-			result := cls.Predict(vector, 2)
+			result := predictions.GetClass(0)
 			Convey("The result should be 'blue", func() {
 				So(result, ShouldEqual, "blue")
 			})
 		})

 		Convey("When predicting the label for our first vector", func() {
-			// The vector we're going to predict
-			vector2 := []float64{5, 5, 5}
-			result2 := cls.Predict(vector2, 2)
+			result2 := predictions.GetClass(1)
 			Convey("The result should be 'red", func() {
 				So(result2, ShouldEqual, "red")
 			})
--- a/knn/knn_train.csv
+++ b/knn/knn_train.csv
@ -0,0 +1,4 @@
+1,1,1,blue
+1,1,1,blue
+3,3,3,red
+6,6,6,red