diff --git a/base/csv.go b/base/csv.go index 037d907..516859d 100644 --- a/base/csv.go +++ b/base/csv.go @@ -43,7 +43,7 @@ func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute { return attrs } -// ParseCsvSniffAttributeNames returns a slice containing the top row +// ParseCSVSniffAttributeNames returns a slice containing the top row // of a given CSV file, or placeholders if hasHeaders is false. func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string { file, err := os.Open(filepath) diff --git a/base/csv_test.go b/base/csv_test.go index 354ef02..c936327 100644 --- a/base/csv_test.go +++ b/base/csv_test.go @@ -5,12 +5,12 @@ import "testing" func TestParseCSVGetRows(testEnv *testing.T) { lineCount := ParseCSVGetRows("../examples/datasets/iris.csv") if lineCount != 150 { - testEnv.Error("Should have %d lines, has %d", 150, lineCount) + testEnv.Errorf("Should have %d lines, has %d", 150, lineCount) } lineCount = ParseCSVGetRows("../examples/datasets/iris_headers.csv") if lineCount != 151 { - testEnv.Error("Should have %d lines, has %d", 151, lineCount) + testEnv.Errorf("Should have %d lines, has %d", 151, lineCount) } } @@ -18,14 +18,14 @@ func TestParseCSVGetRows(testEnv *testing.T) { func TestParseCCSVGetAttributes(testEnv *testing.T) { attrs := ParseCSVGetAttributes("../examples/datasets/iris_headers.csv", true) if attrs[0].GetType() != Float64Type { - testEnv.Error("First attribute should be a float, %s", attrs[0]) + testEnv.Errorf("First attribute should be a float, %s", attrs[0]) } if attrs[0].GetName() != "Sepal length" { - testEnv.Error(attrs[0].GetName()) + testEnv.Errorf(attrs[0].GetName()) } if attrs[4].GetType() != CategoricalType { - testEnv.Error("Final attribute should be categorical, %s", attrs[4]) + testEnv.Errorf("Final attribute should be categorical, %s", attrs[4]) } if attrs[4].GetName() != "Species" { testEnv.Error(attrs[4]) @@ -35,19 +35,19 @@ func TestParseCCSVGetAttributes(testEnv *testing.T) { func TestParseCsvSniffAttributeTypes(testEnv *testing.T) { attrs := ParseCSVSniffAttributeTypes("../examples/datasets/iris_headers.csv", true) if attrs[0].GetType() != Float64Type { - testEnv.Error("First attribute should be a float, %s", attrs[0]) + testEnv.Errorf("First attribute should be a float, %s", attrs[0]) } if attrs[1].GetType() != Float64Type { - testEnv.Error("Second attribute should be a float, %s", attrs[1]) + testEnv.Errorf("Second attribute should be a float, %s", attrs[1]) } if attrs[2].GetType() != Float64Type { - testEnv.Error("Third attribute should be a float, %s", attrs[2]) + testEnv.Errorf("Third attribute should be a float, %s", attrs[2]) } if attrs[3].GetType() != Float64Type { - testEnv.Error("Fourth attribute should be a float, %s", attrs[3]) + testEnv.Errorf("Fourth attribute should be a float, %s", attrs[3]) } if attrs[4].GetType() != CategoricalType { - testEnv.Error("Final attribute should be categorical, %s", attrs[4]) + testEnv.Errorf("Final attribute should be categorical, %s", attrs[4]) } } diff --git a/base/domain.go b/base/domain.go index ff247e2..af82ca9 100644 --- a/base/domain.go +++ b/base/domain.go @@ -12,17 +12,18 @@ import ( mat64 "github.com/gonum/matrix/mat64" ) -// An object that can ingest some data and train on it. +// An Estimator is object that can ingest some data and train on it. type Estimator interface { Fit() } -// An object that provides predictions. +// A Predictor is an object that provides predictions. type Predictor interface { Predict() } -// An supervised learning object, that is possible of scoring accuracy against a test set. +// A Model is a supervised learning object, that is +// possible of scoring accuracy against a test set. type Model interface { Score() } @@ -31,7 +32,7 @@ type BaseEstimator struct { Data *mat64.Dense } -// Serialises an estimator to a provided filepath, in gob format. +// SaveEstimatorToGob serialises an estimator to a provided filepath, in gob format. // See http://golang.org/pkg/encoding/gob for further details. func SaveEstimatorToGob(path string, e *Estimator) { b := new(bytes.Buffer) diff --git a/base/instances.go b/base/instances.go index 124745a..9c396ae 100644 --- a/base/instances.go +++ b/base/instances.go @@ -142,7 +142,7 @@ func NewInstances(attrs []Attribute, rows int) *Instances { func CheckNewInstancesFromRaw(attrs []Attribute, rows int, data []float64) error { size := rows * len(attrs) if size < len(data) { - return errors.New("base: data length is larger than the rows * attribute space.") + return errors.New("base: data length is larger than the rows * attribute space") } else if size > len(data) { return errors.New("base: data is smaller than the rows * attribute space") } @@ -198,7 +198,6 @@ func InstancesTrainTestSplit(src *Instances, prop float64) (*Instances, *Instanc rawTestMatrix.SetRow(i, rowDat) } - trainingRet := NewInstancesFromDense(src.attributes, len(trainingRows), rawTrainMatrix) testRet := NewInstancesFromDense(src.attributes, len(testingRows), rawTestMatrix) return trainingRet, testRet @@ -217,7 +216,7 @@ func (inst *Instances) CountAttrValues(a Attribute) map[string]int { for i := 0; i < inst.Rows; i++ { sysVal := inst.Get(i, attrIndex) stringVal := a.GetStringFromSysVal(sysVal) - ret[stringVal] += 1 + ret[stringVal]++ } return ret } @@ -320,7 +319,7 @@ func (inst *Instances) GetRowVector(row int) []float64 { return inst.storage.RowView(row) } -// GetRowVector returns a row of system representation +// GetRowVectorWithoutClass returns a row of system representation // values at the given row index, excluding the class attribute func (inst *Instances) GetRowVectorWithoutClass(row int) []float64 { rawRow := make([]float64, inst.Cols) @@ -337,7 +336,7 @@ func (inst *Instances) GetClass(row int) string { return attr.GetStringFromSysVal(val) } -// GetClassDist returns a map containing the count of each +// GetClassDistribution returns a map containing the count of each // class type (indexed by the class' string representation) func (inst *Instances) GetClassDistribution() map[string]int { ret := make(map[string]int) @@ -351,13 +350,13 @@ func (inst *Instances) GetClassDistribution() map[string]int { return ret } -func (Inst *Instances) GetClassAttrPtr() *Attribute { - attr := Inst.GetAttr(Inst.ClassIndex) +func (inst *Instances) GetClassAttrPtr() *Attribute { + attr := inst.GetAttr(inst.ClassIndex) return &attr } -func (Inst *Instances) GetClassAttr() Attribute { - return Inst.GetAttr(Inst.ClassIndex) +func (inst *Instances) GetClassAttr() Attribute { + return inst.GetAttr(inst.ClassIndex) } // diff --git a/cross_validation/cross_validation.go b/cross_validation/cross_validation.go index 58e3acf..823db8c 100644 --- a/cross_validation/cross_validation.go +++ b/cross_validation/cross_validation.go @@ -15,10 +15,10 @@ func shuffleMatrix(returnDatasets []*mat.Dense, dataset mat.Matrix, testSize int shuffledSet := mat.DenseCopyOf(dataset) rowCount, colCount := shuffledSet.Dims() temp := make([]float64, colCount) - + // Fisher–Yates shuffle for i := 0; i < rowCount; i++ { - j := numGen.Intn(i+1) + j := numGen.Intn(i + 1) if j != i { // Make a "hard" copy to avoid pointer craziness. copy(temp, shuffledSet.RowView(i)) @@ -43,29 +43,29 @@ func TrainTestSplit(size interface{}, randomState interface{}, datasets ...*mat. // Input should be one or two matrices. dataCount := len(datasets) if dataCount > 2 { - return nil, fmt.Errorf("Expected 1 or 2 datasets, got %d\n", dataCount) + return nil, fmt.Errorf("expected 1 or 2 datasets, got %d\n", dataCount) } if dataCount == 2 { // Test for consistency. labelCount, labelFeatures := datasets[1].Dims() if labelCount != instanceCount { - return nil, fmt.Errorf("Data and labels must have the same number of instances") + return nil, fmt.Errorf("data and labels must have the same number of instances") } else if labelFeatures != 1 { - return nil, fmt.Errorf("Label matrix must have single feature") + return nil, fmt.Errorf("label matrix must have single feature") } } var testSize int switch size := size.(type) { - // If size is an integer, treat it as the test data instance count. + // If size is an integer, treat it as the test data instance count. case int: testSize = size case float64: // If size is a float, treat it as a percentage of the instances to be allocated to the test set. testSize = int(float64(instanceCount)*size + 0.5) default: - return nil, fmt.Errorf("Expected a test instance count (int) or percentage (float64)") + return nil, fmt.Errorf("expected a test instance count (int) or percentage (float64)") } var randSeed int64 diff --git a/ensemble/randomforest.go b/ensemble/randomforest.go index a0a364b..e00129a 100644 --- a/ensemble/randomforest.go +++ b/ensemble/randomforest.go @@ -1,10 +1,10 @@ package ensemble import ( + "fmt" base "github.com/sjwhitworth/golearn/base" meta "github.com/sjwhitworth/golearn/meta" trees "github.com/sjwhitworth/golearn/trees" - "fmt" ) // RandomForest classifies instances using an ensemble @@ -16,7 +16,7 @@ type RandomForest struct { Model *meta.BaggedModel } -// NewRandomForests generates and return a new random forests +// NewRandomForest generates and return a new random forests // forestSize controls the number of trees that get built // features controls the number of features used to build each tree func NewRandomForest(forestSize int, features int) *RandomForest { @@ -29,7 +29,7 @@ func NewRandomForest(forestSize int, features int) *RandomForest { return ret } -// Train builds the RandomForest on the specified instances +// Fit builds the RandomForest on the specified instances func (f *RandomForest) Fit(on *base.Instances) { f.Model = new(meta.BaggedModel) f.Model.RandomFeatures = f.Features @@ -47,4 +47,4 @@ func (f *RandomForest) Predict(with *base.Instances) *base.Instances { func (f *RandomForest) String() string { return fmt.Sprintf("RandomForest(ForestSize: %d, Features:%d, %s\n)", f.ForestSize, f.Features, f.Model) -} \ No newline at end of file +} diff --git a/evaluation/confusion.go b/evaluation/confusion.go index 31a3a48..3b00224 100644 --- a/evaluation/confusion.go +++ b/evaluation/confusion.go @@ -23,7 +23,7 @@ func GetConfusionMatrix(ref *base.Instances, gen *base.Instances) map[string]map referenceClass := ref.GetClass(i) predictedClass := gen.GetClass(i) if _, ok := ret[referenceClass]; ok { - ret[referenceClass][predictedClass] += 1 + ret[referenceClass][predictedClass]++ } else { ret[referenceClass] = make(map[string]int) ret[referenceClass][predictedClass] = 1 diff --git a/filters/chimerge.go b/filters/chimerge.go index 720702a..01a5040 100644 --- a/filters/chimerge.go +++ b/filters/chimerge.go @@ -21,7 +21,7 @@ type ChiMergeFilter struct { _Trained bool } -// Create a ChiMergeFilter with some helpful intialisations. +// NewChiMergeFilter creates a ChiMergeFilter with some helpful initialisations. func NewChiMergeFilter(inst *base.Instances, significance float64) ChiMergeFilter { return ChiMergeFilter{ make([]int, 0), @@ -45,16 +45,16 @@ func (c *ChiMergeFilter) Build() { // AddAllNumericAttributes adds every suitable attribute // to the ChiMergeFilter for discretisation -func (b *ChiMergeFilter) AddAllNumericAttributes() { - for i := 0; i < b.Instances.Cols; i++ { - if i == b.Instances.ClassIndex { +func (c *ChiMergeFilter) AddAllNumericAttributes() { + for i := 0; i < c.Instances.Cols; i++ { + if i == c.Instances.ClassIndex { continue } - attr := b.Instances.GetAttr(i) + attr := c.Instances.GetAttr(i) if attr.GetType() != base.Float64Type { continue } - b.Attributes = append(b.Attributes, i) + c.Attributes = append(c.Attributes, i) } } @@ -110,7 +110,7 @@ type FrequencyTableEntry struct { } func (t *FrequencyTableEntry) String() string { - return fmt.Sprintf("%.2f %s", t.Value, t.Frequency) + return fmt.Sprintf("%.2f %v", t.Value, t.Frequency) } func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEntry { @@ -129,7 +129,7 @@ func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEn for _, entry := range ret { if entry.Value == valueConv { found = true - entry.Frequency[class] += 1 + entry.Frequency[class]++ } } if !found { diff --git a/filters/chimerge_test.go b/filters/chimerge_test.go index 0f49404..029eb23 100644 --- a/filters/chimerge_test.go +++ b/filters/chimerge_test.go @@ -20,7 +20,7 @@ func TestChiMFreqTable(testEnv *testing.T) { testEnv.Error("Wrong frequency") } if freq[0].Frequency["c3"] != 4 { - testEnv.Error("Wrong frequency %s", freq[1]) + testEnv.Errorf("Wrong frequency %s", freq[1]) } if freq[10].Frequency["c2"] != 1 { testEnv.Error("Wrong frequency") @@ -111,7 +111,7 @@ func TestChiMerge2(testEnv *testing.T) { inst.Sort(base.Ascending, attrs) freq := chiMerge(inst, 0, 0.90, 0, inst.Rows) if len(freq) != 5 { - testEnv.Error("Wrong length (%d)", len(freq)) + testEnv.Errorf("Wrong length (%d)", len(freq)) testEnv.Error(freq) } if freq[0].Value != 4.3 { diff --git a/knn/knn.go b/knn/knn.go index d8b10cd..e16ed74 100644 --- a/knn/knn.go +++ b/knn/knn.go @@ -1,4 +1,4 @@ -// Package KNN implements a K Nearest Neighbors object, capable of both classification +// Package knn implements a K Nearest Neighbors object, capable of both classification // and regression. It accepts data in the form of a slice of float64s, which are then reshaped // into a X by Y matrix. package knn @@ -10,7 +10,7 @@ import ( util "github.com/sjwhitworth/golearn/utilities" ) -// A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a distance function. +// A KNNClassifier consists of a data matrix, associated labels in the same order as the matrix, and a distance function. // The accepted distance functions at this time are 'euclidean' and 'manhattan'. type KNNClassifier struct { base.BaseEstimator @@ -19,7 +19,7 @@ type KNNClassifier struct { NearestNeighbours int } -// Returns a new classifier +// NewKnnClassifier returns a new classifier func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier { KNN := KNNClassifier{} KNN.DistanceFunc = distfunc @@ -27,12 +27,12 @@ func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier { return &KNN } -// Train stores the training data for llater +// Fit stores the training data for later func (KNN *KNNClassifier) Fit(trainingData *base.Instances) { KNN.TrainingData = trainingData } -// Returns a classification for the vector, based on a vector input, using the KNN algorithm. +// PredictOne returns a classification for the vector, based on a vector input, using the KNN algorithm. // See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm. func (KNN *KNNClassifier) PredictOne(vector []float64) string { @@ -75,7 +75,7 @@ func (KNN *KNNClassifier) PredictOne(vector []float64) string { labels = append(labels, label) if _, ok := maxmap[label]; ok { - maxmap[label] += 1 + maxmap[label]++ } else { maxmap[label] = 1 } @@ -95,14 +95,14 @@ func (KNN *KNNClassifier) Predict(what *base.Instances) *base.Instances { return ret } -//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name. +// A KNNRegressor consists of a data matrix, associated result variables in the same order as the matrix, and a name. type KNNRegressor struct { base.BaseEstimator Values []float64 DistanceFunc string } -// Mints a new classifier. +// NewKnnRegressor mints a new classifier. func NewKnnRegressor(distfunc string) *KNNRegressor { KNN := KNNRegressor{} KNN.DistanceFunc = distfunc @@ -119,7 +119,6 @@ func (KNN *KNNRegressor) Fit(values []float64, numbers []float64, rows int, cols } func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 { - // Get the number of rows rows, _ := KNN.Data.Dims() rownumbers := make(map[int]float64) diff --git a/linear_models/liblinear.go b/linear_models/liblinear.go index 94893c5..82c6133 100644 --- a/linear_models/liblinear.go +++ b/linear_models/liblinear.go @@ -49,7 +49,7 @@ func NewProblem(X [][]float64, y []float64, bias float64) *Problem { prob.c_prob.x = convert_features(X, bias) c_y := make([]C.int, len(y)) - for i := 0; i < len(y); i += 1 { + for i := 0; i < len(y); i++ { c_y[i] = C.int(y[i]) } prob.c_prob.y = &c_y[0] @@ -70,26 +70,26 @@ func Predict(model *Model, x []float64) float64 { } func convert_vector(x []float64, bias float64) *C.struct_feature_node { n_ele := 0 - for i := 0; i < len(x); i += 1 { + for i := 0; i < len(x); i++ { if x[i] > 0 { - n_ele += 1 + n_ele++ } } n_ele += 2 c_x := make([]C.struct_feature_node, n_ele) j := 0 - for i := 0; i < len(x); i += 1 { + for i := 0; i < len(x); i++ { if x[i] > 0 { c_x[j].index = C.int(i + 1) c_x[j].value = C.double(x[i]) - j += 1 + j++ } } if bias > 0 { c_x[j].index = C.int(0) c_x[j].value = C.double(0) - j += 1 + j++ } c_x[j].index = C.int(-1) return &c_x[0] @@ -98,12 +98,12 @@ func convert_features(X [][]float64, bias float64) **C.struct_feature_node { n_samples := len(X) n_elements := 0 - for i := 0; i < n_samples; i += 1 { - for j := 0; j < len(X[i]); j += 1 { + for i := 0; i < n_samples; i++ { + for j := 0; j < len(X[i]); j++ { if X[i][j] != 0.0 { - n_elements += 1 + n_elements++ } - n_elements += 1 //for bias + n_elements++ //for bias } } @@ -113,23 +113,23 @@ func convert_features(X [][]float64, bias float64) **C.struct_feature_node { x := make([]*C.struct_feature_node, n_samples) var c_x **C.struct_feature_node - for i := 0; i < n_samples; i += 1 { + for i := 0; i < n_samples; i++ { x[i] = &x_space[cursor] - for j := 0; j < len(X[i]); j += 1 { + for j := 0; j < len(X[i]); j++ { if X[i][j] != 0.0 { x_space[cursor].index = C.int(j + 1) x_space[cursor].value = C.double(X[i][j]) - cursor += 1 + cursor++ } if bias > 0 { x_space[cursor].index = C.int(0) x_space[cursor].value = C.double(bias) - cursor += 1 + cursor++ } } x_space[cursor].index = C.int(-1) - cursor += 1 + cursor++ } c_x = &x[0] return c_x diff --git a/meta/bagging.go b/meta/bagging.go index 8813f42..64f979d 100644 --- a/meta/bagging.go +++ b/meta/bagging.go @@ -79,7 +79,7 @@ func (b *BaggedModel) AddModel(m base.Classifier) { b.Models = append(b.Models, m) } -// Train generates and trains each model on a randomised subset of +// Fit generates and trains each model on a randomised subset of // Instances. func (b *BaggedModel) Fit(from *base.Instances) { var wait sync.WaitGroup @@ -153,7 +153,7 @@ func (b *BaggedModel) Predict(from *base.Instances) *base.Instances { } // Send all the models to the workers for prediction - for i, _ := range b.Models { + for i := range b.Models { processpipe <- i } close(processpipe) // Finished sending models to be predicted diff --git a/metrics/pairwise/chebyshev.go b/metrics/pairwise/chebyshev.go index 0068433..86f5663 100644 --- a/metrics/pairwise/chebyshev.go +++ b/metrics/pairwise/chebyshev.go @@ -12,7 +12,7 @@ func NewChebyshev() *Chebyshev { return &Chebyshev{} } -func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { +func (c *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { r1, c1 := vectorX.Dims() r2, c2 := vectorY.Dims() if r1 != r2 || c1 != c2 { diff --git a/metrics/pairwise/cranberra.go b/metrics/pairwise/cranberra.go index 8a972cd..09551de 100644 --- a/metrics/pairwise/cranberra.go +++ b/metrics/pairwise/cranberra.go @@ -15,12 +15,11 @@ func NewCranberra() *Cranberra { func cranberraDistanceStep(num float64, denom float64) float64 { if num == .0 && denom == .0 { return .0 - } else { - return num / denom } + return num / denom } -func (self *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { +func (c *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { r1, c1 := vectorX.Dims() r2, c2 := vectorY.Dims() if r1 != r2 || c1 != c2 { diff --git a/metrics/pairwise/euclidean.go b/metrics/pairwise/euclidean.go index 882ff3b..8be0931 100644 --- a/metrics/pairwise/euclidean.go +++ b/metrics/pairwise/euclidean.go @@ -12,19 +12,19 @@ func NewEuclidean() *Euclidean { return &Euclidean{} } -// Compute Eucledian inner product. -func (self *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { +// InnerProduct computes a Eucledian inner product. +func (e *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { result := vectorX.Dot(vectorY) return result } -// Compute Euclidean distance (also known as L2 distance). -func (self *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { +// Distance computes Euclidean distance (also known as L2 distance). +func (e *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { subVector := mat64.NewDense(0, 0, nil) subVector.Sub(vectorX, vectorY) - result := self.InnerProduct(subVector, subVector) + result := e.InnerProduct(subVector, subVector) return math.Sqrt(result) } diff --git a/metrics/pairwise/manhattan.go b/metrics/pairwise/manhattan.go index 6da0a6e..5664a9d 100644 --- a/metrics/pairwise/manhattan.go +++ b/metrics/pairwise/manhattan.go @@ -12,9 +12,9 @@ func NewManhattan() *Manhattan { return &Manhattan{} } -// Manhattan distance, also known as L1 distance. -// Compute sum of absolute values of elements. -func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { +// Distance computes the Manhattan distance, also known as L1 distance. +// == the sum of the absolute values of elements. +func (m *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { r1, c1 := vectorX.Dims() r2, c2 := vectorY.Dims() if r1 != r2 || c1 != c2 { diff --git a/metrics/pairwise/poly_kernel.go b/metrics/pairwise/poly_kernel.go index 03e9ef3..a4844bb 100644 --- a/metrics/pairwise/poly_kernel.go +++ b/metrics/pairwise/poly_kernel.go @@ -10,25 +10,25 @@ type PolyKernel struct { degree int } -// Return a d-degree polynomial kernel +// NewPolyKernel returns a d-degree polynomial kernel func NewPolyKernel(degree int) *PolyKernel { return &PolyKernel{degree: degree} } -// Compute inner product through kernel trick +// InnerProduct computes the inner product through a kernel trick // K(x, y) = (x^T y + 1)^d -func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { +func (p *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { result := vectorX.Dot(vectorY) - result = math.Pow(result+1, float64(self.degree)) + result = math.Pow(result+1, float64(p.degree)) return result } -// Compute distance under the polynomial kernel, maybe no need. -func (self *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { +// Distance computes distance under the polynomial kernel (maybe not needed?) +func (p *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { subVector := mat64.NewDense(0, 0, nil) subVector.Sub(vectorX, vectorY) - result := self.InnerProduct(subVector, subVector) + result := p.InnerProduct(subVector, subVector) return math.Sqrt(result) } diff --git a/metrics/pairwise/rbf_kernel.go b/metrics/pairwise/rbf_kernel.go index af02463..1f3fc14 100644 --- a/metrics/pairwise/rbf_kernel.go +++ b/metrics/pairwise/rbf_kernel.go @@ -10,18 +10,18 @@ type RBFKernel struct { gamma float64 } -// Radial Basis Function Kernel +// NewRBFKernel returns a representation of a Radial Basis Function Kernel func NewRBFKernel(gamma float64) *RBFKernel { return &RBFKernel{gamma: gamma} } -// Compute inner product through kernel trick +// InnerProduct computes the inner product through a kernel trick // K(x, y) = exp(-gamma * ||x - y||^2) -func (self *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { +func (r *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { euclidean := NewEuclidean() distance := euclidean.Distance(vectorX, vectorY) - result := math.Exp(-self.gamma * math.Pow(distance, 2)) + result := math.Exp(-r.gamma * math.Pow(distance, 2)) return result } diff --git a/optimisation/gradient_descent.go b/optimisation/gradient_descent.go index 4ddcdd2..43bbd54 100644 --- a/optimisation/gradient_descent.go +++ b/optimisation/gradient_descent.go @@ -2,7 +2,7 @@ package optimisation import "github.com/gonum/matrix/mat64" -// Batch gradient descent finds the local minimum of a function. +// BatchGradientDescent finds the local minimum of a function. // See http://en.wikipedia.org/wiki/Gradient_descent for more details. func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense { m, _ := y.Dims() @@ -35,7 +35,7 @@ func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *m return theta } -// Stochastic gradient descent updates the parameters of theta on a random row selection from a matrix. +// StochasticGradientDescent updates the parameters of theta on a random row selection from a matrix. // It is faster as it does not compute the cost function over the entire dataset every time. // It instead calculates the error parameters over only one row of the dataset at a time. // In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes diff --git a/trees/entropy.go b/trees/entropy.go index e7287fc..958107a 100644 --- a/trees/entropy.go +++ b/trees/entropy.go @@ -12,7 +12,7 @@ import ( type InformationGainRuleGenerator struct { } -// GetSplitAttribute returns the non-class Attribute which maximises the +// GenerateSplitAttribute returns the non-class Attribute which maximises the // information gain. // // IMPORTANT: passing a base.Instances with no Attributes other than the class @@ -27,7 +27,7 @@ func (r *InformationGainRuleGenerator) GenerateSplitAttribute(f *base.Instances) return r.GetSplitAttributeFromSelection(allAttributes, f) } -// GetSplitAttribute from selection returns the class Attribute which maximises +// GetSplitAttributeFromSelection returns the class Attribute which maximises // the information gain amongst consideredAttributes // // IMPORTANT: passing a zero-length consideredAttributes parameter will panic() diff --git a/trees/id3.go b/trees/id3.go index f7ceab0..5a88faa 100644 --- a/trees/id3.go +++ b/trees/id3.go @@ -156,18 +156,18 @@ func (d *DecisionTreeNode) Prune(using *base.Instances) { // If you're a leaf, you're already pruned if d.Children == nil { return - } else { - if d.SplitAttr == nil { - return - } - // Recursively prune children of this node - sub := using.DecomposeOnAttributeValues(d.SplitAttr) - for k := range d.Children { - if sub[k] == nil { - continue - } - d.Children[k].Prune(sub[k]) + } + if d.SplitAttr == nil { + return + } + + // Recursively prune children of this node + sub := using.DecomposeOnAttributeValues(d.SplitAttr) + for k := range d.Children { + if sub[k] == nil { + continue } + d.Children[k].Prune(sub[k]) } // Get a baseline accuracy @@ -234,7 +234,7 @@ type ID3DecisionTree struct { PruneSplit float64 } -// Returns a new ID3DecisionTree with the specified test-prune +// NewID3DecisionTree returns a new ID3DecisionTree with the specified test-prune // ratio. Of the ratio is less than 0.001, the tree isn't pruned func NewID3DecisionTree(prune float64) *ID3DecisionTree { return &ID3DecisionTree{ diff --git a/trees/random.go b/trees/random.go index 0a47878..76ba48c 100644 --- a/trees/random.go +++ b/trees/random.go @@ -66,7 +66,7 @@ func NewRandomTree(attrs int) *RandomTree { } } -// Train builds a RandomTree suitable for prediction +// Fit builds a RandomTree suitable for prediction func (rt *RandomTree) Fit(from *base.Instances) { rt.Root = InferID3Tree(from, rt.Rule) } diff --git a/utilities/utilities.go b/utilities/utilities.go index 3f2901b..fb15562 100644 --- a/utilities/utilities.go +++ b/utilities/utilities.go @@ -32,7 +32,7 @@ func SortIntMap(m map[int]float64) []int { sm.m = m sm.s = make([]int, len(m)) i := 0 - for key, _ := range m { + for key := range m { sm.s[i] = key i++ } @@ -62,7 +62,7 @@ func SortStringMap(m map[string]int) []string { sm.m = m sm.s = make([]string, len(m)) i := 0 - for key, _ := range m { + for key := range m { sm.s[i] = key i++ }