mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-26 13:49:14 +08:00
Comments should be of the form "<Struct> ..." or "<MethodName> ..."
This commit is contained in:
parent
4d7bc20a36
commit
627a5537d3
@ -43,7 +43,7 @@ func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
|
||||
return attrs
|
||||
}
|
||||
|
||||
// ParseCsvSniffAttributeNames returns a slice containing the top row
|
||||
// ParseCSVSniffAttributeNames returns a slice containing the top row
|
||||
// of a given CSV file, or placeholders if hasHeaders is false.
|
||||
func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
|
||||
file, err := os.Open(filepath)
|
||||
|
@ -12,17 +12,18 @@ import (
|
||||
mat64 "github.com/gonum/matrix/mat64"
|
||||
)
|
||||
|
||||
// An object that can ingest some data and train on it.
|
||||
// An Estimator is object that can ingest some data and train on it.
|
||||
type Estimator interface {
|
||||
Fit()
|
||||
}
|
||||
|
||||
// An object that provides predictions.
|
||||
// A Predictor is an object that provides predictions.
|
||||
type Predictor interface {
|
||||
Predict()
|
||||
}
|
||||
|
||||
// An supervised learning object, that is possible of scoring accuracy against a test set.
|
||||
// A Model is a supervised learning object, that is
|
||||
// possible of scoring accuracy against a test set.
|
||||
type Model interface {
|
||||
Score()
|
||||
}
|
||||
@ -31,7 +32,7 @@ type BaseEstimator struct {
|
||||
Data *mat64.Dense
|
||||
}
|
||||
|
||||
// Serialises an estimator to a provided filepath, in gob format.
|
||||
// SaveEstimatorToGob serialises an estimator to a provided filepath, in gob format.
|
||||
// See http://golang.org/pkg/encoding/gob for further details.
|
||||
func SaveEstimatorToGob(path string, e *Estimator) {
|
||||
b := new(bytes.Buffer)
|
||||
|
@ -319,7 +319,7 @@ func (inst *Instances) GetRowVector(row int) []float64 {
|
||||
return inst.storage.RowView(row)
|
||||
}
|
||||
|
||||
// GetRowVector returns a row of system representation
|
||||
// GetRowVectorWithoutClass returns a row of system representation
|
||||
// values at the given row index, excluding the class attribute
|
||||
func (inst *Instances) GetRowVectorWithoutClass(row int) []float64 {
|
||||
rawRow := make([]float64, inst.Cols)
|
||||
@ -336,7 +336,7 @@ func (inst *Instances) GetClass(row int) string {
|
||||
return attr.GetStringFromSysVal(val)
|
||||
}
|
||||
|
||||
// GetClassDist returns a map containing the count of each
|
||||
// GetClassDistribution returns a map containing the count of each
|
||||
// class type (indexed by the class' string representation)
|
||||
func (inst *Instances) GetClassDistribution() map[string]int {
|
||||
ret := make(map[string]int)
|
||||
|
@ -1,10 +1,10 @@
|
||||
package ensemble
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
base "github.com/sjwhitworth/golearn/base"
|
||||
meta "github.com/sjwhitworth/golearn/meta"
|
||||
trees "github.com/sjwhitworth/golearn/trees"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// RandomForest classifies instances using an ensemble
|
||||
@ -16,7 +16,7 @@ type RandomForest struct {
|
||||
Model *meta.BaggedModel
|
||||
}
|
||||
|
||||
// NewRandomForests generates and return a new random forests
|
||||
// NewRandomForest generates and return a new random forests
|
||||
// forestSize controls the number of trees that get built
|
||||
// features controls the number of features used to build each tree
|
||||
func NewRandomForest(forestSize int, features int) *RandomForest {
|
||||
@ -29,7 +29,7 @@ func NewRandomForest(forestSize int, features int) *RandomForest {
|
||||
return ret
|
||||
}
|
||||
|
||||
// Train builds the RandomForest on the specified instances
|
||||
// Fit builds the RandomForest on the specified instances
|
||||
func (f *RandomForest) Fit(on *base.Instances) {
|
||||
f.Model = new(meta.BaggedModel)
|
||||
f.Model.RandomFeatures = f.Features
|
||||
@ -47,4 +47,4 @@ func (f *RandomForest) Predict(with *base.Instances) *base.Instances {
|
||||
|
||||
func (f *RandomForest) String() string {
|
||||
return fmt.Sprintf("RandomForest(ForestSize: %d, Features:%d, %s\n)", f.ForestSize, f.Features, f.Model)
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ type ChiMergeFilter struct {
|
||||
_Trained bool
|
||||
}
|
||||
|
||||
// Create a ChiMergeFilter with some helpful intialisations.
|
||||
// NewChiMergeFilter creates a ChiMergeFilter with some helpful initialisations.
|
||||
func NewChiMergeFilter(inst *base.Instances, significance float64) ChiMergeFilter {
|
||||
return ChiMergeFilter{
|
||||
make([]int, 0),
|
||||
|
15
knn/knn.go
15
knn/knn.go
@ -1,4 +1,4 @@
|
||||
// Package KNN implements a K Nearest Neighbors object, capable of both classification
|
||||
// Package knn implements a K Nearest Neighbors object, capable of both classification
|
||||
// and regression. It accepts data in the form of a slice of float64s, which are then reshaped
|
||||
// into a X by Y matrix.
|
||||
package knn
|
||||
@ -10,7 +10,7 @@ import (
|
||||
util "github.com/sjwhitworth/golearn/utilities"
|
||||
)
|
||||
|
||||
// A KNN Classifier. Consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
|
||||
// A KNNClassifier consists of a data matrix, associated labels in the same order as the matrix, and a distance function.
|
||||
// The accepted distance functions at this time are 'euclidean' and 'manhattan'.
|
||||
type KNNClassifier struct {
|
||||
base.BaseEstimator
|
||||
@ -19,7 +19,7 @@ type KNNClassifier struct {
|
||||
NearestNeighbours int
|
||||
}
|
||||
|
||||
// Returns a new classifier
|
||||
// NewKnnClassifier returns a new classifier
|
||||
func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
|
||||
KNN := KNNClassifier{}
|
||||
KNN.DistanceFunc = distfunc
|
||||
@ -27,12 +27,12 @@ func NewKnnClassifier(distfunc string, neighbours int) *KNNClassifier {
|
||||
return &KNN
|
||||
}
|
||||
|
||||
// Train stores the training data for llater
|
||||
// Fit stores the training data for later
|
||||
func (KNN *KNNClassifier) Fit(trainingData *base.Instances) {
|
||||
KNN.TrainingData = trainingData
|
||||
}
|
||||
|
||||
// Returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
||||
// PredictOne returns a classification for the vector, based on a vector input, using the KNN algorithm.
|
||||
// See http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm.
|
||||
func (KNN *KNNClassifier) PredictOne(vector []float64) string {
|
||||
|
||||
@ -95,14 +95,14 @@ func (KNN *KNNClassifier) Predict(what *base.Instances) *base.Instances {
|
||||
return ret
|
||||
}
|
||||
|
||||
//A KNN Regressor. Consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
||||
// A KNNRegressor consists of a data matrix, associated result variables in the same order as the matrix, and a name.
|
||||
type KNNRegressor struct {
|
||||
base.BaseEstimator
|
||||
Values []float64
|
||||
DistanceFunc string
|
||||
}
|
||||
|
||||
// Mints a new classifier.
|
||||
// NewKnnRegressor mints a new classifier.
|
||||
func NewKnnRegressor(distfunc string) *KNNRegressor {
|
||||
KNN := KNNRegressor{}
|
||||
KNN.DistanceFunc = distfunc
|
||||
@ -119,7 +119,6 @@ func (KNN *KNNRegressor) Fit(values []float64, numbers []float64, rows int, cols
|
||||
}
|
||||
|
||||
func (KNN *KNNRegressor) Predict(vector *mat64.Dense, K int) float64 {
|
||||
|
||||
// Get the number of rows
|
||||
rows, _ := KNN.Data.Dims()
|
||||
rownumbers := make(map[int]float64)
|
||||
|
@ -79,7 +79,7 @@ func (b *BaggedModel) AddModel(m base.Classifier) {
|
||||
b.Models = append(b.Models, m)
|
||||
}
|
||||
|
||||
// Train generates and trains each model on a randomised subset of
|
||||
// Fit generates and trains each model on a randomised subset of
|
||||
// Instances.
|
||||
func (b *BaggedModel) Fit(from *base.Instances) {
|
||||
var wait sync.WaitGroup
|
||||
|
@ -12,14 +12,14 @@ func NewEuclidean() *Euclidean {
|
||||
return &Euclidean{}
|
||||
}
|
||||
|
||||
// Compute Eucledian inner product.
|
||||
// InnerProduct computes a Eucledian inner product.
|
||||
func (self *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
result := vectorX.Dot(vectorY)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Compute Euclidean distance (also known as L2 distance).
|
||||
// Distance computes Euclidean distance (also known as L2 distance).
|
||||
func (self *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
subVector := mat64.NewDense(0, 0, nil)
|
||||
subVector.Sub(vectorX, vectorY)
|
||||
|
@ -12,8 +12,8 @@ func NewManhattan() *Manhattan {
|
||||
return &Manhattan{}
|
||||
}
|
||||
|
||||
// Manhattan distance, also known as L1 distance.
|
||||
// Compute sum of absolute values of elements.
|
||||
// Distance computes the Manhattan distance, also known as L1 distance.
|
||||
// == the sum of the absolute values of elements.
|
||||
func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
r1, c1 := vectorX.Dims()
|
||||
r2, c2 := vectorY.Dims()
|
||||
|
@ -10,12 +10,12 @@ type PolyKernel struct {
|
||||
degree int
|
||||
}
|
||||
|
||||
// Return a d-degree polynomial kernel
|
||||
// NewPolyKernel returns a d-degree polynomial kernel
|
||||
func NewPolyKernel(degree int) *PolyKernel {
|
||||
return &PolyKernel{degree: degree}
|
||||
}
|
||||
|
||||
// Compute inner product through kernel trick
|
||||
// InnerProduct computes the inner product through a kernel trick
|
||||
// K(x, y) = (x^T y + 1)^d
|
||||
func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
result := vectorX.Dot(vectorY)
|
||||
@ -24,7 +24,7 @@ func (self *PolyKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense)
|
||||
return result
|
||||
}
|
||||
|
||||
// Compute distance under the polynomial kernel, maybe no need.
|
||||
// Distance computes distance under the polynomial kernel (maybe not needed?)
|
||||
func (self *PolyKernel) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
subVector := mat64.NewDense(0, 0, nil)
|
||||
subVector.Sub(vectorX, vectorY)
|
||||
|
@ -10,12 +10,12 @@ type RBFKernel struct {
|
||||
gamma float64
|
||||
}
|
||||
|
||||
// Radial Basis Function Kernel
|
||||
// NewRBFKernel returns a representation of a Radial Basis Function Kernel
|
||||
func NewRBFKernel(gamma float64) *RBFKernel {
|
||||
return &RBFKernel{gamma: gamma}
|
||||
}
|
||||
|
||||
// Compute inner product through kernel trick
|
||||
// InnerProduct computes the inner product through a kernel trick
|
||||
// K(x, y) = exp(-gamma * ||x - y||^2)
|
||||
func (self *RBFKernel) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
|
||||
euclidean := NewEuclidean()
|
||||
|
@ -2,7 +2,7 @@ package optimisation
|
||||
|
||||
import "github.com/gonum/matrix/mat64"
|
||||
|
||||
// Batch gradient descent finds the local minimum of a function.
|
||||
// BatchGradientDescent finds the local minimum of a function.
|
||||
// See http://en.wikipedia.org/wiki/Gradient_descent for more details.
|
||||
func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense {
|
||||
m, _ := y.Dims()
|
||||
@ -35,7 +35,7 @@ func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *m
|
||||
return theta
|
||||
}
|
||||
|
||||
// Stochastic gradient descent updates the parameters of theta on a random row selection from a matrix.
|
||||
// StochasticGradientDescent updates the parameters of theta on a random row selection from a matrix.
|
||||
// It is faster as it does not compute the cost function over the entire dataset every time.
|
||||
// It instead calculates the error parameters over only one row of the dataset at a time.
|
||||
// In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes
|
||||
|
@ -12,7 +12,7 @@ import (
|
||||
type InformationGainRuleGenerator struct {
|
||||
}
|
||||
|
||||
// GetSplitAttribute returns the non-class Attribute which maximises the
|
||||
// GenerateSplitAttribute returns the non-class Attribute which maximises the
|
||||
// information gain.
|
||||
//
|
||||
// IMPORTANT: passing a base.Instances with no Attributes other than the class
|
||||
@ -27,7 +27,7 @@ func (r *InformationGainRuleGenerator) GenerateSplitAttribute(f *base.Instances)
|
||||
return r.GetSplitAttributeFromSelection(allAttributes, f)
|
||||
}
|
||||
|
||||
// GetSplitAttribute from selection returns the class Attribute which maximises
|
||||
// GetSplitAttributeFromSelection returns the class Attribute which maximises
|
||||
// the information gain amongst consideredAttributes
|
||||
//
|
||||
// IMPORTANT: passing a zero-length consideredAttributes parameter will panic()
|
||||
|
@ -234,7 +234,7 @@ type ID3DecisionTree struct {
|
||||
PruneSplit float64
|
||||
}
|
||||
|
||||
// Returns a new ID3DecisionTree with the specified test-prune
|
||||
// NewID3DecisionTree returns a new ID3DecisionTree with the specified test-prune
|
||||
// ratio. Of the ratio is less than 0.001, the tree isn't pruned
|
||||
func NewID3DecisionTree(prune float64) *ID3DecisionTree {
|
||||
return &ID3DecisionTree{
|
||||
|
@ -66,7 +66,7 @@ func NewRandomTree(attrs int) *RandomTree {
|
||||
}
|
||||
}
|
||||
|
||||
// Train builds a RandomTree suitable for prediction
|
||||
// Fit builds a RandomTree suitable for prediction
|
||||
func (rt *RandomTree) Fit(from *base.Instances) {
|
||||
rt.Root = InferID3Tree(from, rt.Rule)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user