1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-28 13:48:56 +08:00

Merge pull request #49 from njern/golint

Apply Golint and go vet.
This commit is contained in:
Bert Chang 2014-07-18 21:21:35 +08:00
commit 4bda400c66
23 changed files with 112 additions and 114 deletions

View File

@ -43,7 +43,7 @@ func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
return attrs
}
// ParseCsvSniffAttributeNames returns a slice containing the top row
// ParseCSVSniffAttributeNames returns a slice containing the top row
// of a given CSV file, or placeholders if hasHeaders is false.
func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
file, err := os.Open(filepath)

View File

@ -5,12 +5,12 @@ import "testing"
func TestParseCSVGetRows(testEnv *testing.T) {
lineCount := ParseCSVGetRows("../examples/datasets/iris.csv")
if lineCount != 150 {
testEnv.Error("Should have %d lines, has %d", 150, lineCount)
testEnv.Errorf("Should have %d lines, has %d", 150, lineCount)
}
lineCount = ParseCSVGetRows("../examples/datasets/iris_headers.csv")
if lineCount != 151 {
testEnv.Error("Should have %d lines, has %d", 151, lineCount)
testEnv.Errorf("Should have %d lines, has %d", 151, lineCount)
}
}
@ -18,14 +18,14 @@ func TestParseCSVGetRows(testEnv *testing.T) {
func TestParseCCSVGetAttributes(testEnv *testing.T) {
attrs := ParseCSVGetAttributes("../examples/datasets/iris_headers.csv", true)
if attrs[0].GetType() != Float64Type {
testEnv.Error("First attribute should be a float, %s", attrs[0])
testEnv.Errorf("First attribute should be a float, %s", attrs[0])
}
if attrs[0].GetName() != "Sepal length" {
testEnv.Error(attrs[0].GetName())
testEnv.Errorf(attrs[0].GetName())
}
if attrs[4].GetType() != CategoricalType {
testEnv.Error("Final attribute should be categorical, %s", attrs[4])
testEnv.Errorf("Final attribute should be categorical, %s", attrs[4])
}
if attrs[4].GetName() != "Species" {
testEnv.Error(attrs[4])
@ -35,19 +35,19 @@ func TestParseCCSVGetAttributes(testEnv *testing.T) {
func TestParseCsvSniffAttributeTypes(testEnv *testing.T) {
attrs := ParseCSVSniffAttributeTypes("../examples/datasets/iris_headers.csv", true)
if attrs[0].GetType() != Float64Type {
testEnv.Error("First attribute should be a float, %s", attrs[0])
testEnv.Errorf("First attribute should be a float, %s", attrs[0])
}
if attrs[1].GetType() != Float64Type {
testEnv.Error("Second attribute should be a float, %s", attrs[1])
testEnv.Errorf("Second attribute should be a float, %s", attrs[1])
}
if attrs[2].GetType() != Float64Type {
testEnv.Error("Third attribute should be a float, %s", attrs[2])
testEnv.Errorf("Third attribute should be a float, %s", attrs[2])
}
if attrs[3].GetType() != Float64Type {
testEnv.Error("Fourth attribute should be a float, %s", attrs[3])
testEnv.Errorf("Fourth attribute should be a float, %s", attrs[3])
}
if attrs[4].GetType() != CategoricalType {
testEnv.Error("Final attribute should be categorical, %s", attrs[4])
testEnv.Errorf("Final attribute should be categorical, %s", attrs[4])
}
}

View File

@ -12,17 +12,18 @@ import (
mat64 "github.com/gonum/matrix/mat64"
)
// An object that can ingest some data and train on it.
// An Estimator is object that can ingest some data and train on it.
type Estimator interface {
Fit()
}
// An object that provides predictions.
// A Predictor is an object that provides predictions.
type Predictor interface {
Predict()
}
// An supervised learning object, that is possible of scoring accuracy against a test set.
// A Model is a supervised learning object, that is
// possible of scoring accuracy against a test set.
type Model interface {
Score()
}
@ -31,7 +32,7 @@ type BaseEstimator struct {
Data *mat64.Dense
}
// Serialises an estimator to a provided filepath, in gob format.
// SaveEstimatorToGob serialises an estimator to a provided filepath, in gob format.
// See http://golang.org/pkg/encoding/gob for further details.
func SaveEstimatorToGob(path string, e *Estimator) {
b := new(bytes.Buffer)

View File

@ -142,7 +142,7 @@ func NewInstances(attrs []Attribute, rows int) *Instances {
func CheckNewInstancesFromRaw(attrs []Attribute, rows int, data []float64) error {
size := rows * len(attrs)
if size < len(data) {
return errors.New("base: data length is larger than the rows * attribute space.")
return errors.New("base: data length is larger than the rows * attribute space")
} else if size > len(data) {
return errors.New("base: data is smaller than the rows * attribute space")
}
@ -198,7 +198,6 @@ func InstancesTrainTestSplit(src *Instances, prop float64) (*Instances, *Instanc
rawTestMatrix.SetRow(i, rowDat)
}
trainingRet := NewInstancesFromDense(src.attributes, len(trainingRows), rawTrainMatrix)
testRet := NewInstancesFromDense(src.attributes, len(testingRows), rawTestMatrix)
return trainingRet, testRet
@ -217,7 +216,7 @@ func (inst *Instances) CountAttrValues(a Attribute) map[string]int {
for i := 0; i < inst.Rows; i++ {
sysVal := inst.Get(i, attrIndex)
stringVal := a.GetStringFromSysVal(sysVal)
ret[stringVal] += 1
ret[stringVal]++
}
return ret
}
@ -320,7 +319,7 @@ func (inst *Instances) GetRowVector(row int) []float64 {
return inst.storage.RowView(row)
}
// GetRowVector returns a row of system representation
// GetRowVectorWithoutClass returns a row of system representation
// values at the given row index, excluding the class attribute
func (inst *Instances) GetRowVectorWithoutClass(row int) []float64 {
rawRow := make([]float64, inst.Cols)
@ -337,7 +336,7 @@ func (inst *Instances) GetClass(row int) string {
return attr.GetStringFromSysVal(val)
}
// GetClassDist returns a map containing the count of each
// GetClassDistribution returns a map containing the count of each
// class type (indexed by the class' string representation)
func (inst *Instances) GetClassDistribution() map[string]int {
ret := make(map[string]int)
@ -351,13 +350,13 @@ func (inst *Instances) GetClassDistribution() map[string]int {
return ret
}
func (Inst *Instances) GetClassAttrPtr() *Attribute {
attr := Inst.GetAttr(Inst.ClassIndex)
func (inst *Instances) GetClassAttrPtr() *Attribute {
attr := inst.GetAttr(inst.ClassIndex)
return &attr
}
func (Inst *Instances) GetClassAttr() Attribute {
return Inst.GetAttr(Inst.ClassIndex)
func (inst *Instances) GetClassAttr() Attribute {
return inst.GetAttr(inst.ClassIndex)
}
//

View File

@ -15,10 +15,10 @@ func shuffleMatrix(returnDatasets []*mat.Dense, dataset mat.Matrix, testSize int
shuffledSet := mat.DenseCopyOf(dataset)
rowCount, colCount := shuffledSet.Dims()
temp := make([]float64, colCount)
// FisherYates shuffle
for i := 0; i < rowCount; i++ {
j := numGen.Intn(i+1)
j := numGen.Intn(i + 1)
if j != i {
// Make a "hard" copy to avoid pointer craziness.
copy(temp, shuffledSet.RowView(i))
@ -43,29 +43,29 @@ func TrainTestSplit(size interface{}, randomState interface{}, datasets ...*mat.
// Input should be one or two matrices.
dataCount := len(datasets)
if dataCount > 2 {
return nil, fmt.Errorf("Expected 1 or 2 datasets, got %d\n", dataCount)
return nil, fmt.Errorf("expected 1 or 2 datasets, got %d\n", dataCount)
}
if dataCount == 2 {
// Test for consistency.
labelCount, labelFeatures := datasets[1].Dims()
if labelCount != instanceCount {
return nil, fmt.Errorf("Data and labels must have the same number of instances")
return nil, fmt.Errorf("data and labels must have the same number of instances")
} else if labelFeatures != 1 {
return nil, fmt.Errorf("Label matrix must have single feature")
return nil, fmt.Errorf("label matrix must have single feature")
}
}
var testSize int
switch size := size.(type) {
// If size is an integer, treat it as the test data instance count.
// If size is an integer, treat it as the test data instance count.
case int:
testSize = size
case float64:
// If size is a float, treat it as a percentage of the instances to be allocated to the test set.
testSize = int(float64(instanceCount)*size + 0.5)
default:
return nil, fmt.Errorf("Expected a test instance count (int) or percentage (float64)")
return nil, fmt.Errorf("expected a test instance count (int) or percentage (float64)")
}
var randSeed int64

View File

@ -1,10 +1,10 @@
package ensemble
import (
"fmt"
base "github.com/sjwhitworth/golearn/base"
meta "github.com/sjwhitworth/golearn/meta"
trees "github.com/sjwhitworth/golearn/trees"
"fmt"
)
// RandomForest classifies instances using an ensemble
@ -16,7 +16,7 @@ type RandomForest struct {
Model *meta.BaggedModel
}
// NewRandomForests generates and return a new random forests
// NewRandomForest generates and return a new random forests
// forestSize controls the number of trees that get built
// features controls the number of features used to build each tree
func NewRandomForest(forestSize int, features int) *RandomForest {
@ -29,7 +29,7 @@ func NewRandomForest(forestSize int, features int) *RandomForest {
return ret
}
// Train builds the RandomForest on the specified instances
// Fit builds the RandomForest on the specified instances
func (f *RandomForest) Fit(on *base.Instances) {
f.Model = new(meta.BaggedModel)
f.Model.RandomFeatures = f.Features
@ -47,4 +47,4 @@ func (f *RandomForest) Predict(with *base.Instances) *base.Instances {
func (f *RandomForest) String() string {
return fmt.Sprintf("RandomForest(ForestSize: %d, Features:%d, %s\n)", f.ForestSize, f.Features, f.Model)
}
}

View File

@ -23,7 +23,7 @@ func GetConfusionMatrix(ref *base.Instances, gen *base.Instances) map[string]map
referenceClass := ref.GetClass(i)
predictedClass := gen.GetClass(i)
if _, ok := ret[referenceClass]; ok {
ret[referenceClass][predictedClass] += 1
ret[referenceClass][predictedClass]++
} else {
ret[referenceClass] = make(map[string]int)
ret[referenceClass][predictedClass] = 1

View File

@ -21,7 +21,7 @@ type ChiMergeFilter struct {
_Trained bool
}
// Create a ChiMergeFilter with some helpful intialisations.
// NewChiMergeFilter creates a ChiMergeFilter with some helpful initialisations.
func NewChiMergeFilter(inst *base.Instances, significance float64) ChiMergeFilter {
return ChiMergeFilter{
make([]int, 0),
@ -45,16 +45,16 @@ func (c *ChiMergeFilter) Build() {
// AddAllNumericAttributes adds every suitable attribute
// to the ChiMergeFilter for discretisation
func (b *ChiMergeFilter) AddAllNumericAttributes() {
for i := 0; i < b.Instances.Cols; i++ {
if i == b.Instances.ClassIndex {
func (c *ChiMergeFilter) AddAllNumericAttributes() {
for i := 0; i < c.Instances.Cols; i++ {
if i == c.Instances.ClassIndex {
continue
}
attr := b.Instances.GetAttr(i)
attr := c.Instances.GetAttr(i)
if attr.GetType() != base.Float64Type {
continue
}
b.Attributes = append(b.Attributes, i)
c.Attributes = append(c.Attributes, i)
}
}
@ -110,7 +110,7 @@ type FrequencyTableEntry struct {
}
func (t *FrequencyTableEntry) String() string {
return fmt.Sprintf("%.2f %s", t.Value, t.Frequency)
return fmt.Sprintf("%.2f %v", t.Value, t.Frequency)
}
func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEntry {
@ -129,7 +129,7 @@ func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEn
for _, entry := range ret {
if entry.Value == valueConv {
found = true
entry.Frequency[class] += 1
entry.Frequency[class]++
}
}
if !found {

View File

@ -20,7 +20,7 @@ func TestChiMFreqTable(testEnv *testing.T) {
testEnv.Error("Wrong frequency")
}
if freq[0].Frequency["c3"] != 4 {
testEnv.Error("Wrong frequency %s", freq[1])
testEnv.Errorf("Wrong frequency %s", freq[1])
}
if freq[10].Frequency["c2"] != 1 {
testEnv.Error("Wrong frequency")
@ -111,7 +111,7 @@ func TestChiMerge2(testEnv *testing.T) {
inst.Sort(base.Ascending, attrs)
freq := chiMerge(inst, 0, 0.90, 0, inst.Rows)
if len(freq) != 5 {
testEnv.Error("Wrong length (%d)", len(freq))
testEnv.Errorf("Wrong length (%d)", len(freq))
testEnv.Error(freq)
}
if freq[0].Value != 4.3 {

View File

@ -1,4 +1,4 @@
// Package KNN implements a K Nearest Neighbors object, capable of both classification
// Package knn implements a K Nearest Neighbors object, capable of both classification
// and regression. It accepts data in the form of a slice of float64s, which are then reshaped
// into a X by Y matrix.
package knn
@ -10,7 +10,7 @@ import (
util "github.com/sjwhitworth/golearn/utilities"
)
// A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
// A KNNClassifier consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
// The accepted distance functions at this time are 'euclidean' and 'manhattan'.
type KNNClassifier struct {
base.BaseEstimator
@ -19,7 +19,7 @@ type KNNClassifier struct {
NearestNeighbours int
}
// Returns a new classifier
// NewKnnClassifier returns a new classifier
func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
KNN := KNNClassifier{}
KNN.DistanceFunc = distfunc
@ -27,12 +27,12 @@ func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
return &KNN
}
// Train stores the training data for llater
// Fit stores the training data for later
func (KNN *KNNClassifier) Fit(trainingData *base.Instances) {
KNN.TrainingData = trainingData
}
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
// PredictOne returns a classification for the vector, based on a vector input, using the KNN algorithm.
// See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
func (KNN *KNNClassifier) PredictOne(vector []float64) string {
@ -75,7 +75,7 @@ func (KNN *KNNClassifier) PredictOne(vector []float64) string {
labels = append(labels, label)
if _, ok := maxmap[label]; ok {
maxmap[label] += 1
maxmap[label]++
} else {
maxmap[label] = 1
}
@ -95,14 +95,14 @@ func (KNN *KNNClassifier) Predict(what *base.Instances) *base.Instances {
return ret
}
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
// A KNNRegressor consists of a data matrix, associated result variables in the same order as the matrix, and a name.
type KNNRegressor struct {
base.BaseEstimator
Values []float64
DistanceFunc string
}
// Mints a new classifier.
// NewKnnRegressor mints a new classifier.
func NewKnnRegressor(distfunc string) *KNNRegressor {
KNN := KNNRegressor{}
KNN.DistanceFunc = distfunc
@ -119,7 +119,6 @@ func (KNN *KNNRegressor) Fit(values []float64, numbers []float64, rows int, cols
}
func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {
// Get the number of rows
rows, _ := KNN.Data.Dims()
rownumbers := make(map[int]float64)

View File

@ -49,7 +49,7 @@ func NewProblem(X [][]float64, y []float64, bias float64) *Problem {
prob.c_prob.x = convert_features(X, bias)
c_y := make([]C.int, len(y))
for i := 0; i < len(y); i += 1 {
for i := 0; i < len(y); i++ {
c_y[i] = C.int(y[i])
}
prob.c_prob.y = &c_y[0]
@ -70,26 +70,26 @@ func Predict(model *Model, x []float64) float64 {
}
func convert_vector(x []float64, bias float64) *C.struct_feature_node {
n_ele := 0
for i := 0; i < len(x); i += 1 {
for i := 0; i < len(x); i++ {
if x[i] > 0 {
n_ele += 1
n_ele++
}
}
n_ele += 2
c_x := make([]C.struct_feature_node, n_ele)
j := 0
for i := 0; i < len(x); i += 1 {
for i := 0; i < len(x); i++ {
if x[i] > 0 {
c_x[j].index = C.int(i + 1)
c_x[j].value = C.double(x[i])
j += 1
j++
}
}
if bias > 0 {
c_x[j].index = C.int(0)
c_x[j].value = C.double(0)
j += 1
j++
}
c_x[j].index = C.int(-1)
return &c_x[0]
@ -98,12 +98,12 @@ func convert_features(X [][]float64, bias float64) **C.struct_feature_node {
n_samples := len(X)
n_elements := 0
for i := 0; i < n_samples; i += 1 {
for j := 0; j < len(X[i]); j += 1 {
for i := 0; i < n_samples; i++ {
for j := 0; j < len(X[i]); j++ {
if X[i][j] != 0.0 {
n_elements += 1
n_elements++
}
n_elements += 1 //for bias
n_elements++ //for bias
}
}
@ -113,23 +113,23 @@ func convert_features(X [][]float64, bias float64) **C.struct_feature_node {
x := make([]*C.struct_feature_node, n_samples)
var c_x **C.struct_feature_node
for i := 0; i < n_samples; i += 1 {
for i := 0; i < n_samples; i++ {
x[i] = &x_space[cursor]
for j := 0; j < len(X[i]); j += 1 {
for j := 0; j < len(X[i]); j++ {
if X[i][j] != 0.0 {
x_space[cursor].index = C.int(j + 1)
x_space[cursor].value = C.double(X[i][j])
cursor += 1
cursor++
}
if bias > 0 {
x_space[cursor].index = C.int(0)
x_space[cursor].value = C.double(bias)
cursor += 1
cursor++
}
}
x_space[cursor].index = C.int(-1)
cursor += 1
cursor++
}
c_x = &x[0]
return c_x

View File

@ -79,7 +79,7 @@ func (b *BaggedModel) AddModel(m base.Classifier) {
b.Models = append(b.Models, m)
}
// Train generates and trains each model on a randomised subset of
// Fit generates and trains each model on a randomised subset of
// Instances.
func (b *BaggedModel) Fit(from *base.Instances) {
var wait sync.WaitGroup
@ -153,7 +153,7 @@ func (b *BaggedModel) Predict(from *base.Instances) *base.Instances {
}
// Send all the models to the workers for prediction
for i, _ := range b.Models {
for i := range b.Models {
processpipe <- i
}
close(processpipe) // Finished sending models to be predicted

View File

@ -12,7 +12,7 @@ func NewChebyshev() *Chebyshev {
return &Chebyshev{}
}
func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
func (c *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
r1, c1 := vectorX.Dims()
r2, c2 := vectorY.Dims()
if r1 != r2 || c1 != c2 {

View File

@ -15,12 +15,11 @@ func NewCranberra() *Cranberra {
func cranberraDistanceStep(num float64, denom float64) float64 {
if num == .0 && denom == .0 {
return .0
} else {
return num / denom
}
return num / denom
}
func (self *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
func (c *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
r1, c1 := vectorX.Dims()
r2, c2 := vectorY.Dims()
if r1 != r2 || c1 != c2 {

View File

@ -12,19 +12,19 @@ func NewEuclidean() *Euclidean {
return &Euclidean{}
}
// Compute Eucledian inner product.
func (self *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
// InnerProduct computes a Eucledian inner product.
func (e *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
result := vectorX.Dot(vectorY)
return result
}
// Compute Euclidean distance (also known as L2 distance).
func (self *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
// Distance computes Euclidean distance (also known as L2 distance).
func (e *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
subVector := mat64.NewDense(0, 0, nil)
subVector.Sub(vectorX, vectorY)
result := self.InnerProduct(subVector, subVector)
result := e.InnerProduct(subVector, subVector)
return math.Sqrt(result)
}

View File

@ -12,9 +12,9 @@ func NewManhattan() *Manhattan {
return &Manhattan{}
}
// Manhattan distance, also known as L1 distance.
// Compute sum of absolute values of elements.
func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
// Distance computes the Manhattan distance, also known as L1 distance.
// == the sum of the absolute values of elements.
func (m *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
r1, c1 := vectorX.Dims()
r2, c2 := vectorY.Dims()
if r1 != r2 || c1 != c2 {

View File

@ -10,25 +10,25 @@ type PolyKernel struct {
degree int
}
// Return a d-degree polynomial kernel
// NewPolyKernel returns a d-degree polynomial kernel
func NewPolyKernel(degree int) *PolyKernel {
return &PolyKernel{degree: degree}
}
// Compute inner product through kernel trick
// InnerProduct computes the inner product through a kernel trick
// K(x, y) = (x^T y + 1)^d
func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
func (p *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
result := vectorX.Dot(vectorY)
result = math.Pow(result+1, float64(self.degree))
result = math.Pow(result+1, float64(p.degree))
return result
}
// Compute distance under the polynomial kernel, maybe no need.
func (self *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
// Distance computes distance under the polynomial kernel (maybe not needed?)
func (p *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
subVector := mat64.NewDense(0, 0, nil)
subVector.Sub(vectorX, vectorY)
result := self.InnerProduct(subVector, subVector)
result := p.InnerProduct(subVector, subVector)
return math.Sqrt(result)
}

View File

@ -10,18 +10,18 @@ type RBFKernel struct {
gamma float64
}
// Radial Basis Function Kernel
// NewRBFKernel returns a representation of a Radial Basis Function Kernel
func NewRBFKernel(gamma float64) *RBFKernel {
return &RBFKernel{gamma: gamma}
}
// Compute inner product through kernel trick
// InnerProduct computes the inner product through a kernel trick
// K(x, y) = exp(-gamma * ||x - y||^2)
func (self *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
func (r *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
euclidean := NewEuclidean()
distance := euclidean.Distance(vectorX, vectorY)
result := math.Exp(-self.gamma * math.Pow(distance, 2))
result := math.Exp(-r.gamma * math.Pow(distance, 2))
return result
}

View File

@ -2,7 +2,7 @@ package optimisation
import "github.com/gonum/matrix/mat64"
// Batch gradient descent finds the local minimum of a function.
// BatchGradientDescent finds the local minimum of a function.
// See http://en.wikipedia.org/wiki/Gradient_descent for more details.
func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense {
m, _ := y.Dims()
@ -35,7 +35,7 @@ func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *m
return theta
}
// Stochastic gradient descent updates the parameters of theta on a random row selection from a matrix.
// StochasticGradientDescent updates the parameters of theta on a random row selection from a matrix.
// It is faster as it does not compute the cost function over the entire dataset every time.
// It instead calculates the error parameters over only one row of the dataset at a time.
// In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes

View File

@ -12,7 +12,7 @@ import (
type InformationGainRuleGenerator struct {
}
// GetSplitAttribute returns the non-class Attribute which maximises the
// GenerateSplitAttribute returns the non-class Attribute which maximises the
// information gain.
//
// IMPORTANT: passing a base.Instances with no Attributes other than the class
@ -27,7 +27,7 @@ func (r *InformationGainRuleGenerator) GenerateSplitAttribute(f *base.Instances)
return r.GetSplitAttributeFromSelection(allAttributes, f)
}
// GetSplitAttribute from selection returns the class Attribute which maximises
// GetSplitAttributeFromSelection returns the class Attribute which maximises
// the information gain amongst consideredAttributes
//
// IMPORTANT: passing a zero-length consideredAttributes parameter will panic()

View File

@ -156,18 +156,18 @@ func (d *DecisionTreeNode) Prune(using *base.Instances) {
// If you're a leaf, you're already pruned
if d.Children == nil {
return
} else {
if d.SplitAttr == nil {
return
}
// Recursively prune children of this node
sub := using.DecomposeOnAttributeValues(d.SplitAttr)
for k := range d.Children {
if sub[k] == nil {
continue
}
d.Children[k].Prune(sub[k])
}
if d.SplitAttr == nil {
return
}
// Recursively prune children of this node
sub := using.DecomposeOnAttributeValues(d.SplitAttr)
for k := range d.Children {
if sub[k] == nil {
continue
}
d.Children[k].Prune(sub[k])
}
// Get a baseline accuracy
@ -234,7 +234,7 @@ type ID3DecisionTree struct {
PruneSplit float64
}
// Returns a new ID3DecisionTree with the specified test-prune
// NewID3DecisionTree returns a new ID3DecisionTree with the specified test-prune
// ratio. Of the ratio is less than 0.001, the tree isn't pruned
func NewID3DecisionTree(prune float64) *ID3DecisionTree {
return &ID3DecisionTree{

View File

@ -66,7 +66,7 @@ func NewRandomTree(attrs int) *RandomTree {
}
}
// Train builds a RandomTree suitable for prediction
// Fit builds a RandomTree suitable for prediction
func (rt *RandomTree) Fit(from *base.Instances) {
rt.Root = InferID3Tree(from, rt.Rule)
}

View File

@ -32,7 +32,7 @@ func SortIntMap(m map[int]float64) []int {
sm.m = m
sm.s = make([]int, len(m))
i := 0
for key, _ := range m {
for key := range m {
sm.s[i] = key
i++
}
@ -62,7 +62,7 @@ func SortStringMap(m map[string]int) []string {
sm.m = m
sm.s = make([]string, len(m))
i := 0
for key, _ := range m {
for key := range m {
sm.s[i] = key
i++
}