1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-26 13:49:14 +08:00

Comments should be of the form "<Struct> ..." or "<MethodName> ..."

This commit is contained in:
Niclas Jern 2014-07-18 13:48:28 +03:00
parent 4d7bc20a36
commit 627a5537d3
15 changed files with 36 additions and 36 deletions

View File

@ -43,7 +43,7 @@ func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
return attrs
}
// ParseCsvSniffAttributeNames returns a slice containing the top row
// ParseCSVSniffAttributeNames returns a slice containing the top row
// of a given CSV file, or placeholders if hasHeaders is false.
func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
file, err := os.Open(filepath)

View File

@ -12,17 +12,18 @@ import (
mat64 "github.com/gonum/matrix/mat64"
)
// An object that can ingest some data and train on it.
// An Estimator is object that can ingest some data and train on it.
type Estimator interface {
Fit()
}
// An object that provides predictions.
// A Predictor is an object that provides predictions.
type Predictor interface {
Predict()
}
// An supervised learning object, that is possible of scoring accuracy against a test set.
// A Model is a supervised learning object, that is
// possible of scoring accuracy against a test set.
type Model interface {
Score()
}
@ -31,7 +32,7 @@ type BaseEstimator struct {
Data *mat64.Dense
}
// Serialises an estimator to a provided filepath, in gob format.
// SaveEstimatorToGob serialises an estimator to a provided filepath, in gob format.
// See http://golang.org/pkg/encoding/gob for further details.
func SaveEstimatorToGob(path string, e *Estimator) {
b := new(bytes.Buffer)

View File

@ -319,7 +319,7 @@ func (inst *Instances) GetRowVector(row int) []float64 {
return inst.storage.RowView(row)
}
// GetRowVector returns a row of system representation
// GetRowVectorWithoutClass returns a row of system representation
// values at the given row index, excluding the class attribute
func (inst *Instances) GetRowVectorWithoutClass(row int) []float64 {
rawRow := make([]float64, inst.Cols)
@ -336,7 +336,7 @@ func (inst *Instances) GetClass(row int) string {
return attr.GetStringFromSysVal(val)
}
// GetClassDist returns a map containing the count of each
// GetClassDistribution returns a map containing the count of each
// class type (indexed by the class' string representation)
func (inst *Instances) GetClassDistribution() map[string]int {
ret := make(map[string]int)

View File

@ -1,10 +1,10 @@
package ensemble
import (
"fmt"
base "github.com/sjwhitworth/golearn/base"
meta "github.com/sjwhitworth/golearn/meta"
trees "github.com/sjwhitworth/golearn/trees"
"fmt"
)
// RandomForest classifies instances using an ensemble
@ -16,7 +16,7 @@ type RandomForest struct {
Model *meta.BaggedModel
}
// NewRandomForests generates and return a new random forests
// NewRandomForest generates and return a new random forests
// forestSize controls the number of trees that get built
// features controls the number of features used to build each tree
func NewRandomForest(forestSize int, features int) *RandomForest {
@ -29,7 +29,7 @@ func NewRandomForest(forestSize int, features int) *RandomForest {
return ret
}
// Train builds the RandomForest on the specified instances
// Fit builds the RandomForest on the specified instances
func (f *RandomForest) Fit(on *base.Instances) {
f.Model = new(meta.BaggedModel)
f.Model.RandomFeatures = f.Features
@ -47,4 +47,4 @@ func (f *RandomForest) Predict(with *base.Instances) *base.Instances {
func (f *RandomForest) String() string {
return fmt.Sprintf("RandomForest(ForestSize: %d, Features:%d, %s\n)", f.ForestSize, f.Features, f.Model)
}
}

View File

@ -21,7 +21,7 @@ type ChiMergeFilter struct {
_Trained bool
}
// Create a ChiMergeFilter with some helpful intialisations.
// NewChiMergeFilter creates a ChiMergeFilter with some helpful initialisations.
func NewChiMergeFilter(inst *base.Instances, significance float64) ChiMergeFilter {
return ChiMergeFilter{
make([]int, 0),

View File

@ -1,4 +1,4 @@
// Package KNN implements a K Nearest Neighbors object, capable of both classification
// Package knn implements a K Nearest Neighbors object, capable of both classification
// and regression. It accepts data in the form of a slice of float64s, which are then reshaped
// into a X by Y matrix.
package knn
@ -10,7 +10,7 @@ import (
util "github.com/sjwhitworth/golearn/utilities"
)
// A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
// A KNNClassifier consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
// The accepted distance functions at this time are 'euclidean' and 'manhattan'.
type KNNClassifier struct {
base.BaseEstimator
@ -19,7 +19,7 @@ type KNNClassifier struct {
NearestNeighbours int
}
// Returns a new classifier
// NewKnnClassifier returns a new classifier
func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
KNN := KNNClassifier{}
KNN.DistanceFunc = distfunc
@ -27,12 +27,12 @@ func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
return &KNN
}
// Train stores the training data for llater
// Fit stores the training data for later
func (KNN *KNNClassifier) Fit(trainingData *base.Instances) {
KNN.TrainingData = trainingData
}
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
// PredictOne returns a classification for the vector, based on a vector input, using the KNN algorithm.
// See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
func (KNN *KNNClassifier) PredictOne(vector []float64) string {
@ -95,14 +95,14 @@ func (KNN *KNNClassifier) Predict(what *base.Instances) *base.Instances {
return ret
}
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
// A KNNRegressor consists of a data matrix, associated result variables in the same order as the matrix, and a name.
type KNNRegressor struct {
base.BaseEstimator
Values []float64
DistanceFunc string
}
// Mints a new classifier.
// NewKnnRegressor mints a new classifier.
func NewKnnRegressor(distfunc string) *KNNRegressor {
KNN := KNNRegressor{}
KNN.DistanceFunc = distfunc
@ -119,7 +119,6 @@ func (KNN *KNNRegressor) Fit(values []float64, numbers []float64, rows int, cols
}
func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {
// Get the number of rows
rows, _ := KNN.Data.Dims()
rownumbers := make(map[int]float64)

View File

@ -79,7 +79,7 @@ func (b *BaggedModel) AddModel(m base.Classifier) {
b.Models = append(b.Models, m)
}
// Train generates and trains each model on a randomised subset of
// Fit generates and trains each model on a randomised subset of
// Instances.
func (b *BaggedModel) Fit(from *base.Instances) {
var wait sync.WaitGroup

View File

@ -12,14 +12,14 @@ func NewEuclidean() *Euclidean {
return &Euclidean{}
}
// Compute Eucledian inner product.
// InnerProduct computes a Eucledian inner product.
func (self *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
result := vectorX.Dot(vectorY)
return result
}
// Compute Euclidean distance (also known as L2 distance).
// Distance computes Euclidean distance (also known as L2 distance).
func (self *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
subVector := mat64.NewDense(0, 0, nil)
subVector.Sub(vectorX, vectorY)

View File

@ -12,8 +12,8 @@ func NewManhattan() *Manhattan {
return &Manhattan{}
}
// Manhattan distance, also known as L1 distance.
// Compute sum of absolute values of elements.
// Distance computes the Manhattan distance, also known as L1 distance.
// == the sum of the absolute values of elements.
func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
r1, c1 := vectorX.Dims()
r2, c2 := vectorY.Dims()

View File

@ -10,12 +10,12 @@ type PolyKernel struct {
degree int
}
// Return a d-degree polynomial kernel
// NewPolyKernel returns a d-degree polynomial kernel
func NewPolyKernel(degree int) *PolyKernel {
return &PolyKernel{degree: degree}
}
// Compute inner product through kernel trick
// InnerProduct computes the inner product through a kernel trick
// K(x, y) = (x^T y + 1)^d
func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
result := vectorX.Dot(vectorY)
@ -24,7 +24,7 @@ func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense)
return result
}
// Compute distance under the polynomial kernel, maybe no need.
// Distance computes distance under the polynomial kernel (maybe not needed?)
func (self *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
subVector := mat64.NewDense(0, 0, nil)
subVector.Sub(vectorX, vectorY)

View File

@ -10,12 +10,12 @@ type RBFKernel struct {
gamma float64
}
// Radial Basis Function Kernel
// NewRBFKernel returns a representation of a Radial Basis Function Kernel
func NewRBFKernel(gamma float64) *RBFKernel {
return &RBFKernel{gamma: gamma}
}
// Compute inner product through kernel trick
// InnerProduct computes the inner product through a kernel trick
// K(x, y) = exp(-gamma * ||x - y||^2)
func (self *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
euclidean := NewEuclidean()

View File

@ -2,7 +2,7 @@ package optimisation
import "github.com/gonum/matrix/mat64"
// Batch gradient descent finds the local minimum of a function.
// BatchGradientDescent finds the local minimum of a function.
// See http://en.wikipedia.org/wiki/Gradient_descent for more details.
func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense {
m, _ := y.Dims()
@ -35,7 +35,7 @@ func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *m
return theta
}
// Stochastic gradient descent updates the parameters of theta on a random row selection from a matrix.
// StochasticGradientDescent updates the parameters of theta on a random row selection from a matrix.
// It is faster as it does not compute the cost function over the entire dataset every time.
// It instead calculates the error parameters over only one row of the dataset at a time.
// In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes

View File

@ -12,7 +12,7 @@ import (
type InformationGainRuleGenerator struct {
}
// GetSplitAttribute returns the non-class Attribute which maximises the
// GenerateSplitAttribute returns the non-class Attribute which maximises the
// information gain.
//
// IMPORTANT: passing a base.Instances with no Attributes other than the class
@ -27,7 +27,7 @@ func (r *InformationGainRuleGenerator) GenerateSplitAttribute(f *base.Instances)
return r.GetSplitAttributeFromSelection(allAttributes, f)
}
// GetSplitAttribute from selection returns the class Attribute which maximises
// GetSplitAttributeFromSelection returns the class Attribute which maximises
// the information gain amongst consideredAttributes
//
// IMPORTANT: passing a zero-length consideredAttributes parameter will panic()

View File

@ -234,7 +234,7 @@ type ID3DecisionTree struct {
PruneSplit float64
}
// Returns a new ID3DecisionTree with the specified test-prune
// NewID3DecisionTree returns a new ID3DecisionTree with the specified test-prune
// ratio. Of the ratio is less than 0.001, the tree isn't pruned
func NewID3DecisionTree(prune float64) *ID3DecisionTree {
return &ID3DecisionTree{

View File

@ -66,7 +66,7 @@ func NewRandomTree(attrs int) *RandomTree {
}
}
// Train builds a RandomTree suitable for prediction
// Fit builds a RandomTree suitable for prediction
func (rt *RandomTree) Fit(from *base.Instances) {
rt.Root = InferID3Tree(from, rt.Rule)
}