mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-28 13:48:56 +08:00
commit
4bda400c66
@ -43,7 +43,7 @@ func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
|
||||
return attrs
|
||||
}
|
||||
|
||||
// ParseCsvSniffAttributeNames returns a slice containing the top row
|
||||
// ParseCSVSniffAttributeNames returns a slice containing the top row
|
||||
// of a given CSV file, or placeholders if hasHeaders is false.
|
||||
func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
|
||||
file, err := os.Open(filepath)
|
||||
|
@ -5,12 +5,12 @@ import "testing"
|
||||
func TestParseCSVGetRows(testEnv *testing.T) {
|
||||
lineCount := ParseCSVGetRows("../examples/datasets/iris.csv")
|
||||
if lineCount != 150 {
|
||||
testEnv.Error("Should have %d lines, has %d", 150, lineCount)
|
||||
testEnv.Errorf("Should have %d lines, has %d", 150, lineCount)
|
||||
}
|
||||
|
||||
lineCount = ParseCSVGetRows("../examples/datasets/iris_headers.csv")
|
||||
if lineCount != 151 {
|
||||
testEnv.Error("Should have %d lines, has %d", 151, lineCount)
|
||||
testEnv.Errorf("Should have %d lines, has %d", 151, lineCount)
|
||||
}
|
||||
|
||||
}
|
||||
@ -18,14 +18,14 @@ func TestParseCSVGetRows(testEnv *testing.T) {
|
||||
func TestParseCCSVGetAttributes(testEnv *testing.T) {
|
||||
attrs := ParseCSVGetAttributes("../examples/datasets/iris_headers.csv", true)
|
||||
if attrs[0].GetType() != Float64Type {
|
||||
testEnv.Error("First attribute should be a float, %s", attrs[0])
|
||||
testEnv.Errorf("First attribute should be a float, %s", attrs[0])
|
||||
}
|
||||
if attrs[0].GetName() != "Sepal length" {
|
||||
testEnv.Error(attrs[0].GetName())
|
||||
testEnv.Errorf(attrs[0].GetName())
|
||||
}
|
||||
|
||||
if attrs[4].GetType() != CategoricalType {
|
||||
testEnv.Error("Final attribute should be categorical, %s", attrs[4])
|
||||
testEnv.Errorf("Final attribute should be categorical, %s", attrs[4])
|
||||
}
|
||||
if attrs[4].GetName() != "Species" {
|
||||
testEnv.Error(attrs[4])
|
||||
@ -35,19 +35,19 @@ func TestParseCCSVGetAttributes(testEnv *testing.T) {
|
||||
func TestParseCsvSniffAttributeTypes(testEnv *testing.T) {
|
||||
attrs := ParseCSVSniffAttributeTypes("../examples/datasets/iris_headers.csv", true)
|
||||
if attrs[0].GetType() != Float64Type {
|
||||
testEnv.Error("First attribute should be a float, %s", attrs[0])
|
||||
testEnv.Errorf("First attribute should be a float, %s", attrs[0])
|
||||
}
|
||||
if attrs[1].GetType() != Float64Type {
|
||||
testEnv.Error("Second attribute should be a float, %s", attrs[1])
|
||||
testEnv.Errorf("Second attribute should be a float, %s", attrs[1])
|
||||
}
|
||||
if attrs[2].GetType() != Float64Type {
|
||||
testEnv.Error("Third attribute should be a float, %s", attrs[2])
|
||||
testEnv.Errorf("Third attribute should be a float, %s", attrs[2])
|
||||
}
|
||||
if attrs[3].GetType() != Float64Type {
|
||||
testEnv.Error("Fourth attribute should be a float, %s", attrs[3])
|
||||
testEnv.Errorf("Fourth attribute should be a float, %s", attrs[3])
|
||||
}
|
||||
if attrs[4].GetType() != CategoricalType {
|
||||
testEnv.Error("Final attribute should be categorical, %s", attrs[4])
|
||||
testEnv.Errorf("Final attribute should be categorical, %s", attrs[4])
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,17 +12,18 @@ import (
|
||||
mat64 "github.com/gonum/matrix/mat64"
|
||||
)
|
||||
|
||||
// An object that can ingest some data and train on it.
|
||||
// An Estimator is object that can ingest some data and train on it.
|
||||
type Estimator interface {
|
||||
Fit()
|
||||
}
|
||||
|
||||
// An object that provides predictions.
|
||||
// A Predictor is an object that provides predictions.
|
||||
type Predictor interface {
|
||||
Predict()
|
||||
}
|
||||
|
||||
// An supervised learning object, that is possible of scoring accuracy against a test set.
|
||||
// A Model is a supervised learning object, that is
|
||||
// possible of scoring accuracy against a test set.
|
||||
type Model interface {
|
||||
Score()
|
||||
}
|
||||
@ -31,7 +32,7 @@ type BaseEstimator struct {
|
||||
Data *mat64.Dense
|
||||
}
|
||||
|
||||
// Serialises an estimator to a provided filepath, in gob format.
|
||||
// SaveEstimatorToGob serialises an estimator to a provided filepath, in gob format.
|
||||
// See http://golang.org/pkg/encoding/gob for further details.
|
||||
func SaveEstimatorToGob(path string, e *Estimator) {
|
||||
b := new(bytes.Buffer)
|
||||
|
@ -142,7 +142,7 @@ func NewInstances(attrs []Attribute, rows int) *Instances {
|
||||
func CheckNewInstancesFromRaw(attrs []Attribute, rows int, data []float64) error {
|
||||
size := rows * len(attrs)
|
||||
if size < len(data) {
|
||||
return errors.New("base: data length is larger than the rows * attribute space.")
|
||||
return errors.New("base: data length is larger than the rows * attribute space")
|
||||
} else if size > len(data) {
|
||||
return errors.New("base: data is smaller than the rows * attribute space")
|
||||
}
|
||||
@ -198,7 +198,6 @@ func InstancesTrainTestSplit(src *Instances, prop float64) (*Instances, *Instanc
|
||||
rawTestMatrix.SetRow(i, rowDat)
|
||||
}
|
||||
|
||||
|
||||
trainingRet := NewInstancesFromDense(src.attributes, len(trainingRows), rawTrainMatrix)
|
||||
testRet := NewInstancesFromDense(src.attributes, len(testingRows), rawTestMatrix)
|
||||
return trainingRet, testRet
|
||||
@ -217,7 +216,7 @@ func (inst *Instances) CountAttrValues(a Attribute) map[string]int {
|
||||
for i := 0; i < inst.Rows; i++ {
|
||||
sysVal := inst.Get(i, attrIndex)
|
||||
stringVal := a.GetStringFromSysVal(sysVal)
|
||||
ret[stringVal] += 1
|
||||
ret[stringVal]++
|
||||
}
|
||||
return ret
|
||||
}
|
||||
@ -320,7 +319,7 @@ func (inst *Instances) GetRowVector(row int) []float64 {
|
||||
return inst.storage.RowView(row)
|
||||
}
|
||||
|
||||
// GetRowVector returns a row of system representation
|
||||
// GetRowVectorWithoutClass returns a row of system representation
|
||||
// values at the given row index, excluding the class attribute
|
||||
func (inst *Instances) GetRowVectorWithoutClass(row int) []float64 {
|
||||
rawRow := make([]float64, inst.Cols)
|
||||
@ -337,7 +336,7 @@ func (inst *Instances) GetClass(row int) string {
|
||||
return attr.GetStringFromSysVal(val)
|
||||
}
|
||||
|
||||
// GetClassDist returns a map containing the count of each
|
||||
// GetClassDistribution returns a map containing the count of each
|
||||
// class type (indexed by the class' string representation)
|
||||
func (inst *Instances) GetClassDistribution() map[string]int {
|
||||
ret := make(map[string]int)
|
||||
@ -351,13 +350,13 @@ func (inst *Instances) GetClassDistribution() map[string]int {
|
||||
return ret
|
||||
}
|
||||
|
||||
func (Inst *Instances) GetClassAttrPtr() *Attribute {
|
||||
attr := Inst.GetAttr(Inst.ClassIndex)
|
||||
func (inst *Instances) GetClassAttrPtr() *Attribute {
|
||||
attr := inst.GetAttr(inst.ClassIndex)
|
||||
return &attr
|
||||
}
|
||||
|
||||
func (Inst *Instances) GetClassAttr() Attribute {
|
||||
return Inst.GetAttr(Inst.ClassIndex)
|
||||
func (inst *Instances) GetClassAttr() Attribute {
|
||||
return inst.GetAttr(inst.ClassIndex)
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -15,10 +15,10 @@ func shuffleMatrix(returnDatasets []*mat.Dense, dataset mat.Matrix, testSize int
|
||||
shuffledSet := mat.DenseCopyOf(dataset)
|
||||
rowCount, colCount := shuffledSet.Dims()
|
||||
temp := make([]float64, colCount)
|
||||
|
||||
|
||||
// Fisher–Yates shuffle
|
||||
for i := 0; i < rowCount; i++ {
|
||||
j := numGen.Intn(i+1)
|
||||
j := numGen.Intn(i + 1)
|
||||
if j != i {
|
||||
// Make a "hard" copy to avoid pointer craziness.
|
||||
copy(temp, shuffledSet.RowView(i))
|
||||
@ -43,29 +43,29 @@ func TrainTestSplit(size interface{}, randomState interface{}, datasets ...*mat.
|
||||
// Input should be one or two matrices.
|
||||
dataCount := len(datasets)
|
||||
if dataCount > 2 {
|
||||
return nil, fmt.Errorf("Expected 1 or 2 datasets, got %d\n", dataCount)
|
||||
return nil, fmt.Errorf("expected 1 or 2 datasets, got %d\n", dataCount)
|
||||
}
|
||||
|
||||
if dataCount == 2 {
|
||||
// Test for consistency.
|
||||
labelCount, labelFeatures := datasets[1].Dims()
|
||||
if labelCount != instanceCount {
|
||||
return nil, fmt.Errorf("Data and labels must have the same number of instances")
|
||||
return nil, fmt.Errorf("data and labels must have the same number of instances")
|
||||
} else if labelFeatures != 1 {
|
||||
return nil, fmt.Errorf("Label matrix must have single feature")
|
||||
return nil, fmt.Errorf("label matrix must have single feature")
|
||||
}
|
||||
}
|
||||
|
||||
var testSize int
|
||||
switch size := size.(type) {
|
||||
// If size is an integer, treat it as the test data instance count.
|
||||
// If size is an integer, treat it as the test data instance count.
|
||||
case int:
|
||||
testSize = size
|
||||
case float64:
|
||||
// If size is a float, treat it as a percentage of the instances to be allocated to the test set.
|
||||
testSize = int(float64(instanceCount)*size + 0.5)
|
||||
default:
|
||||
return nil, fmt.Errorf("Expected a test instance count (int) or percentage (float64)")
|
||||
return nil, fmt.Errorf("expected a test instance count (int) or percentage (float64)")
|
||||
}
|
||||
|
||||
var randSeed int64
|
||||
|
@ -1,10 +1,10 @@
|
||||
package ensemble
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
base "github.com/sjwhitworth/golearn/base"
|
||||
meta "github.com/sjwhitworth/golearn/meta"
|
||||
trees "github.com/sjwhitworth/golearn/trees"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// RandomForest classifies instances using an ensemble
|
||||
@ -16,7 +16,7 @@ type RandomForest struct {
|
||||
Model *meta.BaggedModel
|
||||
}
|
||||
|
||||
// NewRandomForests generates and return a new random forests
|
||||
// NewRandomForest generates and return a new random forests
|
||||
// forestSize controls the number of trees that get built
|
||||
// features controls the number of features used to build each tree
|
||||
func NewRandomForest(forestSize int, features int) *RandomForest {
|
||||
@ -29,7 +29,7 @@ func NewRandomForest(forestSize int, features int) *RandomForest {
|
||||
return ret
|
||||
}
|
||||
|
||||
// Train builds the RandomForest on the specified instances
|
||||
// Fit builds the RandomForest on the specified instances
|
||||
func (f *RandomForest) Fit(on *base.Instances) {
|
||||
f.Model = new(meta.BaggedModel)
|
||||
f.Model.RandomFeatures = f.Features
|
||||
@ -47,4 +47,4 @@ func (f *RandomForest) Predict(with *base.Instances) *base.Instances {
|
||||
|
||||
func (f *RandomForest) String() string {
|
||||
return fmt.Sprintf("RandomForest(ForestSize: %d, Features:%d, %s\n)", f.ForestSize, f.Features, f.Model)
|
||||
}
|
||||
}
|
||||
|
@ -23,7 +23,7 @@ func GetConfusionMatrix(ref *base.Instances, gen *base.Instances) map[string]map
|
||||
referenceClass := ref.GetClass(i)
|
||||
predictedClass := gen.GetClass(i)
|
||||
if _, ok := ret[referenceClass]; ok {
|
||||
ret[referenceClass][predictedClass] += 1
|
||||
ret[referenceClass][predictedClass]++
|
||||
} else {
|
||||
ret[referenceClass] = make(map[string]int)
|
||||
ret[referenceClass][predictedClass] = 1
|
||||
|
@ -21,7 +21,7 @@ type ChiMergeFilter struct {
|
||||
_Trained bool
|
||||
}
|
||||
|
||||
// Create a ChiMergeFilter with some helpful intialisations.
|
||||
// NewChiMergeFilter creates a ChiMergeFilter with some helpful initialisations.
|
||||
func NewChiMergeFilter(inst *base.Instances, significance float64) ChiMergeFilter {
|
||||
return ChiMergeFilter{
|
||||
make([]int, 0),
|
||||
@ -45,16 +45,16 @@ func (c *ChiMergeFilter) Build() {
|
||||
|
||||
// AddAllNumericAttributes adds every suitable attribute
|
||||
// to the ChiMergeFilter for discretisation
|
||||
func (b *ChiMergeFilter) AddAllNumericAttributes() {
|
||||
for i := 0; i < b.Instances.Cols; i++ {
|
||||
if i == b.Instances.ClassIndex {
|
||||
func (c *ChiMergeFilter) AddAllNumericAttributes() {
|
||||
for i := 0; i < c.Instances.Cols; i++ {
|
||||
if i == c.Instances.ClassIndex {
|
||||
continue
|
||||
}
|
||||
attr := b.Instances.GetAttr(i)
|
||||
attr := c.Instances.GetAttr(i)
|
||||
if attr.GetType() != base.Float64Type {
|
||||
continue
|
||||
}
|
||||
b.Attributes = append(b.Attributes, i)
|
||||
c.Attributes = append(c.Attributes, i)
|
||||
}
|
||||
}
|
||||
|
||||
@ -110,7 +110,7 @@ type FrequencyTableEntry struct {
|
||||
}
|
||||
|
||||
func (t *FrequencyTableEntry) String() string {
|
||||
return fmt.Sprintf("%.2f %s", t.Value, t.Frequency)
|
||||
return fmt.Sprintf("%.2f %v", t.Value, t.Frequency)
|
||||
}
|
||||
|
||||
func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEntry {
|
||||
@ -129,7 +129,7 @@ func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEn
|
||||
for _, entry := range ret {
|
||||
if entry.Value == valueConv {
|
||||
found = true
|
||||
entry.Frequency[class] += 1
|
||||
entry.Frequency[class]++
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
|
@ -20,7 +20,7 @@ func TestChiMFreqTable(testEnv *testing.T) {
|
||||
testEnv.Error("Wrong frequency")
|
||||
}
|
||||
if freq[0].Frequency["c3"] != 4 {
|
||||
testEnv.Error("Wrong frequency %s", freq[1])
|
||||
testEnv.Errorf("Wrong frequency %s", freq[1])
|
||||
}
|
||||
if freq[10].Frequency["c2"] != 1 {
|
||||
testEnv.Error("Wrong frequency")
|
||||
@ -111,7 +111,7 @@ func TestChiMerge2(testEnv *testing.T) {
|
||||
inst.Sort(base.Ascending, attrs)
|
||||
freq := chiMerge(inst, 0, 0.90, 0, inst.Rows)
|
||||
if len(freq) != 5 {
|
||||
testEnv.Error("Wrong length (%d)", len(freq))
|
||||
testEnv.Errorf("Wrong length (%d)", len(freq))
|
||||
testEnv.Error(freq)
|
||||
}
|
||||
if freq[0].Value != 4.3 {
|
||||
|
17
knn/knn.go
17
knn/knn.go
@ -1,4 +1,4 @@
|
||||
// Package KNN implements a K Nearest Neighbors object, capable of both classification
|
||||
// Package knn implements a K Nearest Neighbors object, capable of both classification
|
||||
// and regression. It accepts data in the form of a slice of float64s, which are then reshaped
|
||||
// into a X by Y matrix.
|
||||
package knn
|
||||
@ -10,7 +10,7 @@ import (
|
||||
util "github.com/sjwhitworth/golearn/utilities"
|
||||
)
|
||||
|
||||
// A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
|
||||
// A KNNClassifier consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
|
||||
// The accepted distance functions at this time are 'euclidean' and 'manhattan'.
|
||||
type KNNClassifier struct {
|
||||
base.BaseEstimator
|
||||
@ -19,7 +19,7 @@ type KNNClassifier struct {
|
||||
NearestNeighbours int
|
||||
}
|
||||
|
||||
// Returns a new classifier
|
||||
// NewKnnClassifier returns a new classifier
|
||||
func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
|
||||
KNN := KNNClassifier{}
|
||||
KNN.DistanceFunc = distfunc
|
||||
@ -27,12 +27,12 @@ func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
|
||||
return &KNN
|
||||
}
|
||||
|
||||
// Train stores the training data for llater
|
||||
// Fit stores the training data for later
|
||||
func (KNN *KNNClassifier) Fit(trainingData *base.Instances) {
|
||||
KNN.TrainingData = trainingData
|
||||
}
|
||||
|
||||
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
||||
// PredictOne returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
||||
// See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
|
||||
func (KNN *KNNClassifier) PredictOne(vector []float64) string {
|
||||
|
||||
@ -75,7 +75,7 @@ func (KNN *KNNClassifier) PredictOne(vector []float64) string {
|
||||
labels = append(labels, label)
|
||||
|
||||
if _, ok := maxmap[label]; ok {
|
||||
maxmap[label] += 1
|
||||
maxmap[label]++
|
||||
} else {
|
||||
maxmap[label] = 1
|
||||
}
|
||||
@ -95,14 +95,14 @@ func (KNN *KNNClassifier) Predict(what *base.Instances) *base.Instances {
|
||||
return ret
|
||||
}
|
||||
|
||||
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
||||
// A KNNRegressor consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
||||
type KNNRegressor struct {
|
||||
base.BaseEstimator
|
||||
Values []float64
|
||||
DistanceFunc string
|
||||
}
|
||||
|
||||
// Mints a new classifier.
|
||||
// NewKnnRegressor mints a new classifier.
|
||||
func NewKnnRegressor(distfunc string) *KNNRegressor {
|
||||
KNN := KNNRegressor{}
|
||||
KNN.DistanceFunc = distfunc
|
||||
@ -119,7 +119,6 @@ func (KNN *KNNRegressor) Fit(values []float64, numbers []float64, rows int, cols
|
||||
}
|
||||
|
||||
func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {
|
||||
|
||||
// Get the number of rows
|
||||
rows, _ := KNN.Data.Dims()
|
||||
rownumbers := make(map[int]float64)
|
||||
|
@ -49,7 +49,7 @@ func NewProblem(X [][]float64, y []float64, bias float64) *Problem {
|
||||
|
||||
prob.c_prob.x = convert_features(X, bias)
|
||||
c_y := make([]C.int, len(y))
|
||||
for i := 0; i < len(y); i += 1 {
|
||||
for i := 0; i < len(y); i++ {
|
||||
c_y[i] = C.int(y[i])
|
||||
}
|
||||
prob.c_prob.y = &c_y[0]
|
||||
@ -70,26 +70,26 @@ func Predict(model *Model, x []float64) float64 {
|
||||
}
|
||||
func convert_vector(x []float64, bias float64) *C.struct_feature_node {
|
||||
n_ele := 0
|
||||
for i := 0; i < len(x); i += 1 {
|
||||
for i := 0; i < len(x); i++ {
|
||||
if x[i] > 0 {
|
||||
n_ele += 1
|
||||
n_ele++
|
||||
}
|
||||
}
|
||||
n_ele += 2
|
||||
|
||||
c_x := make([]C.struct_feature_node, n_ele)
|
||||
j := 0
|
||||
for i := 0; i < len(x); i += 1 {
|
||||
for i := 0; i < len(x); i++ {
|
||||
if x[i] > 0 {
|
||||
c_x[j].index = C.int(i + 1)
|
||||
c_x[j].value = C.double(x[i])
|
||||
j += 1
|
||||
j++
|
||||
}
|
||||
}
|
||||
if bias > 0 {
|
||||
c_x[j].index = C.int(0)
|
||||
c_x[j].value = C.double(0)
|
||||
j += 1
|
||||
j++
|
||||
}
|
||||
c_x[j].index = C.int(-1)
|
||||
return &c_x[0]
|
||||
@ -98,12 +98,12 @@ func convert_features(X [][]float64, bias float64) **C.struct_feature_node {
|
||||
n_samples := len(X)
|
||||
n_elements := 0
|
||||
|
||||
for i := 0; i < n_samples; i += 1 {
|
||||
for j := 0; j < len(X[i]); j += 1 {
|
||||
for i := 0; i < n_samples; i++ {
|
||||
for j := 0; j < len(X[i]); j++ {
|
||||
if X[i][j] != 0.0 {
|
||||
n_elements += 1
|
||||
n_elements++
|
||||
}
|
||||
n_elements += 1 //for bias
|
||||
n_elements++ //for bias
|
||||
}
|
||||
}
|
||||
|
||||
@ -113,23 +113,23 @@ func convert_features(X [][]float64, bias float64) **C.struct_feature_node {
|
||||
x := make([]*C.struct_feature_node, n_samples)
|
||||
var c_x **C.struct_feature_node
|
||||
|
||||
for i := 0; i < n_samples; i += 1 {
|
||||
for i := 0; i < n_samples; i++ {
|
||||
x[i] = &x_space[cursor]
|
||||
|
||||
for j := 0; j < len(X[i]); j += 1 {
|
||||
for j := 0; j < len(X[i]); j++ {
|
||||
if X[i][j] != 0.0 {
|
||||
x_space[cursor].index = C.int(j + 1)
|
||||
x_space[cursor].value = C.double(X[i][j])
|
||||
cursor += 1
|
||||
cursor++
|
||||
}
|
||||
if bias > 0 {
|
||||
x_space[cursor].index = C.int(0)
|
||||
x_space[cursor].value = C.double(bias)
|
||||
cursor += 1
|
||||
cursor++
|
||||
}
|
||||
}
|
||||
x_space[cursor].index = C.int(-1)
|
||||
cursor += 1
|
||||
cursor++
|
||||
}
|
||||
c_x = &x[0]
|
||||
return c_x
|
||||
|
@ -79,7 +79,7 @@ func (b *BaggedModel) AddModel(m base.Classifier) {
|
||||
b.Models = append(b.Models, m)
|
||||
}
|
||||
|
||||
// Train generates and trains each model on a randomised subset of
|
||||
// Fit generates and trains each model on a randomised subset of
|
||||
// Instances.
|
||||
func (b *BaggedModel) Fit(from *base.Instances) {
|
||||
var wait sync.WaitGroup
|
||||
@ -153,7 +153,7 @@ func (b *BaggedModel) Predict(from *base.Instances) *base.Instances {
|
||||
}
|
||||
|
||||
// Send all the models to the workers for prediction
|
||||
for i, _ := range b.Models {
|
||||
for i := range b.Models {
|
||||
processpipe <- i
|
||||
}
|
||||
close(processpipe) // Finished sending models to be predicted
|
||||
|
@ -12,7 +12,7 @@ func NewChebyshev() *Chebyshev {
|
||||
return &Chebyshev{}
|
||||
}
|
||||
|
||||
func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
func (c *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
r1, c1 := vectorX.Dims()
|
||||
r2, c2 := vectorY.Dims()
|
||||
if r1 != r2 || c1 != c2 {
|
||||
|
@ -15,12 +15,11 @@ func NewCranberra() *Cranberra {
|
||||
func cranberraDistanceStep(num float64, denom float64) float64 {
|
||||
if num == .0 && denom == .0 {
|
||||
return .0
|
||||
} else {
|
||||
return num / denom
|
||||
}
|
||||
return num / denom
|
||||
}
|
||||
|
||||
func (self *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
func (c *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
r1, c1 := vectorX.Dims()
|
||||
r2, c2 := vectorY.Dims()
|
||||
if r1 != r2 || c1 != c2 {
|
||||
|
@ -12,19 +12,19 @@ func NewEuclidean() *Euclidean {
|
||||
return &Euclidean{}
|
||||
}
|
||||
|
||||
// Compute Eucledian inner product.
|
||||
func (self *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
// InnerProduct computes a Eucledian inner product.
|
||||
func (e *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
result := vectorX.Dot(vectorY)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Compute Euclidean distance (also known as L2 distance).
|
||||
func (self *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
// Distance computes Euclidean distance (also known as L2 distance).
|
||||
func (e *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
subVector := mat64.NewDense(0, 0, nil)
|
||||
subVector.Sub(vectorX, vectorY)
|
||||
|
||||
result := self.InnerProduct(subVector, subVector)
|
||||
result := e.InnerProduct(subVector, subVector)
|
||||
|
||||
return math.Sqrt(result)
|
||||
}
|
||||
|
@ -12,9 +12,9 @@ func NewManhattan() *Manhattan {
|
||||
return &Manhattan{}
|
||||
}
|
||||
|
||||
// Manhattan distance, also known as L1 distance.
|
||||
// Compute sum of absolute values of elements.
|
||||
func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
// Distance computes the Manhattan distance, also known as L1 distance.
|
||||
// == the sum of the absolute values of elements.
|
||||
func (m *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
r1, c1 := vectorX.Dims()
|
||||
r2, c2 := vectorY.Dims()
|
||||
if r1 != r2 || c1 != c2 {
|
||||
|
@ -10,25 +10,25 @@ type PolyKernel struct {
|
||||
degree int
|
||||
}
|
||||
|
||||
// Return a d-degree polynomial kernel
|
||||
// NewPolyKernel returns a d-degree polynomial kernel
|
||||
func NewPolyKernel(degree int) *PolyKernel {
|
||||
return &PolyKernel{degree: degree}
|
||||
}
|
||||
|
||||
// Compute inner product through kernel trick
|
||||
// InnerProduct computes the inner product through a kernel trick
|
||||
// K(x, y) = (x^T y + 1)^d
|
||||
func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
func (p *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
result := vectorX.Dot(vectorY)
|
||||
result = math.Pow(result+1, float64(self.degree))
|
||||
result = math.Pow(result+1, float64(p.degree))
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Compute distance under the polynomial kernel, maybe no need.
|
||||
func (self *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
// Distance computes distance under the polynomial kernel (maybe not needed?)
|
||||
func (p *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
subVector := mat64.NewDense(0, 0, nil)
|
||||
subVector.Sub(vectorX, vectorY)
|
||||
result := self.InnerProduct(subVector, subVector)
|
||||
result := p.InnerProduct(subVector, subVector)
|
||||
|
||||
return math.Sqrt(result)
|
||||
}
|
||||
|
@ -10,18 +10,18 @@ type RBFKernel struct {
|
||||
gamma float64
|
||||
}
|
||||
|
||||
// Radial Basis Function Kernel
|
||||
// NewRBFKernel returns a representation of a Radial Basis Function Kernel
|
||||
func NewRBFKernel(gamma float64) *RBFKernel {
|
||||
return &RBFKernel{gamma: gamma}
|
||||
}
|
||||
|
||||
// Compute inner product through kernel trick
|
||||
// InnerProduct computes the inner product through a kernel trick
|
||||
// K(x, y) = exp(-gamma * ||x - y||^2)
|
||||
func (self *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
func (r *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
euclidean := NewEuclidean()
|
||||
distance := euclidean.Distance(vectorX, vectorY)
|
||||
|
||||
result := math.Exp(-self.gamma * math.Pow(distance, 2))
|
||||
result := math.Exp(-r.gamma * math.Pow(distance, 2))
|
||||
|
||||
return result
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ package optimisation
|
||||
|
||||
import "github.com/gonum/matrix/mat64"
|
||||
|
||||
// Batch gradient descent finds the local minimum of a function.
|
||||
// BatchGradientDescent finds the local minimum of a function.
|
||||
// See http://en.wikipedia.org/wiki/Gradient_descent for more details.
|
||||
func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense {
|
||||
m, _ := y.Dims()
|
||||
@ -35,7 +35,7 @@ func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *m
|
||||
return theta
|
||||
}
|
||||
|
||||
// Stochastic gradient descent updates the parameters of theta on a random row selection from a matrix.
|
||||
// StochasticGradientDescent updates the parameters of theta on a random row selection from a matrix.
|
||||
// It is faster as it does not compute the cost function over the entire dataset every time.
|
||||
// It instead calculates the error parameters over only one row of the dataset at a time.
|
||||
// In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes
|
||||
|
@ -12,7 +12,7 @@ import (
|
||||
type InformationGainRuleGenerator struct {
|
||||
}
|
||||
|
||||
// GetSplitAttribute returns the non-class Attribute which maximises the
|
||||
// GenerateSplitAttribute returns the non-class Attribute which maximises the
|
||||
// information gain.
|
||||
//
|
||||
// IMPORTANT: passing a base.Instances with no Attributes other than the class
|
||||
@ -27,7 +27,7 @@ func (r *InformationGainRuleGenerator) GenerateSplitAttribute(f *base.Instances)
|
||||
return r.GetSplitAttributeFromSelection(allAttributes, f)
|
||||
}
|
||||
|
||||
// GetSplitAttribute from selection returns the class Attribute which maximises
|
||||
// GetSplitAttributeFromSelection returns the class Attribute which maximises
|
||||
// the information gain amongst consideredAttributes
|
||||
//
|
||||
// IMPORTANT: passing a zero-length consideredAttributes parameter will panic()
|
||||
|
24
trees/id3.go
24
trees/id3.go
@ -156,18 +156,18 @@ func (d *DecisionTreeNode) Prune(using *base.Instances) {
|
||||
// If you're a leaf, you're already pruned
|
||||
if d.Children == nil {
|
||||
return
|
||||
} else {
|
||||
if d.SplitAttr == nil {
|
||||
return
|
||||
}
|
||||
// Recursively prune children of this node
|
||||
sub := using.DecomposeOnAttributeValues(d.SplitAttr)
|
||||
for k := range d.Children {
|
||||
if sub[k] == nil {
|
||||
continue
|
||||
}
|
||||
d.Children[k].Prune(sub[k])
|
||||
}
|
||||
if d.SplitAttr == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Recursively prune children of this node
|
||||
sub := using.DecomposeOnAttributeValues(d.SplitAttr)
|
||||
for k := range d.Children {
|
||||
if sub[k] == nil {
|
||||
continue
|
||||
}
|
||||
d.Children[k].Prune(sub[k])
|
||||
}
|
||||
|
||||
// Get a baseline accuracy
|
||||
@ -234,7 +234,7 @@ type ID3DecisionTree struct {
|
||||
PruneSplit float64
|
||||
}
|
||||
|
||||
// Returns a new ID3DecisionTree with the specified test-prune
|
||||
// NewID3DecisionTree returns a new ID3DecisionTree with the specified test-prune
|
||||
// ratio. Of the ratio is less than 0.001, the tree isn't pruned
|
||||
func NewID3DecisionTree(prune float64) *ID3DecisionTree {
|
||||
return &ID3DecisionTree{
|
||||
|
@ -66,7 +66,7 @@ func NewRandomTree(attrs int) *RandomTree {
|
||||
}
|
||||
}
|
||||
|
||||
// Train builds a RandomTree suitable for prediction
|
||||
// Fit builds a RandomTree suitable for prediction
|
||||
func (rt *RandomTree) Fit(from *base.Instances) {
|
||||
rt.Root = InferID3Tree(from, rt.Rule)
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ func SortIntMap(m map[int]float64) []int {
|
||||
sm.m = m
|
||||
sm.s = make([]int, len(m))
|
||||
i := 0
|
||||
for key, _ := range m {
|
||||
for key := range m {
|
||||
sm.s[i] = key
|
||||
i++
|
||||
}
|
||||
@ -62,7 +62,7 @@ func SortStringMap(m map[string]int) []string {
|
||||
sm.m = m
|
||||
sm.s = make([]string, len(m))
|
||||
i := 0
|
||||
for key, _ := range m {
|
||||
for key := range m {
|
||||
sm.s[i] = key
|
||||
i++
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user