1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-26 13:49:14 +08:00

Package documentation

This commit is contained in:
Richard Townsend 2014-05-19 12:59:11 +01:00
parent 889fec4419
commit a6072ac9de
5 changed files with 64 additions and 6 deletions

13
ensemble/ensemble.go Normal file
View File

@ -0,0 +1,13 @@
/*
Ensemble contains classifiers which combine other classifiers.
RandomForest:
Generates ForestSize bagged decision trees (currently ID3-based)
each considering a fixed number of random features.
Built on meta.Bagging
*/
package ensemble

View File

@ -8,7 +8,7 @@ import (
"strings"
)
// BaggedModels train Classifiers on subsets of the original
// BaggedModel trains base.Classifiers on subsets of the original
// Instances and combine the results through voting
type BaggedModel struct {
base.BaseClassifier
@ -17,6 +17,8 @@ type BaggedModel struct {
RandomFeatures int
}
// generateTrainingAttrs selects RandomFeatures number of base.Attributes from
// the provided base.Instances.
func (b *BaggedModel) generateTrainingAttrs(model int, from *base.Instances) []base.Attribute {
ret := make([]base.Attribute, 0)
if b.RandomFeatures == 0 {
@ -51,11 +53,17 @@ func (b *BaggedModel) generateTrainingAttrs(model int, from *base.Instances) []b
return ret
}
// generatePredictionInstances returns a modified version of the
// requested base.Instances with only the base.Attributes selected
// for training the model.
func (b *BaggedModel) generatePredictionInstances(model int, from *base.Instances) *base.Instances {
selected := b.selectedAttributes[model]
return from.SelectAttributes(selected)
}
// generateTrainingInstances generates RandomFeatures number of
// attributes and returns a modified version of base.Instances
// for training the model
func (b *BaggedModel) generateTrainingInstances(model int, from *base.Instances) *base.Instances {
insts := from.SampleWithReplacement(from.Rows)
selected := b.generateTrainingAttrs(model, from)

View File

@ -109,6 +109,8 @@ func InferID3Tree(from *base.Instances, with RuleGenerator) *DecisionTreeNode {
return ret
}
// getNestedString returns the contents of node d
// prefixed by level number of tags (also prints children)
func (d *DecisionTreeNode) getNestedString(level int) string {
buf := bytes.NewBuffer(nil)
tmp := bytes.NewBuffer(nil)
@ -143,6 +145,7 @@ func (d *DecisionTreeNode) String() string {
return d.getNestedString(0)
}
// computeAccuracy is a helper method for Prune()
func computeAccuracy(predictions *base.Instances, from *base.Instances) float64 {
cf := eval.GetConfusionMatrix(from, predictions)
return eval.GetAccuracy(cf)
@ -231,6 +234,8 @@ type ID3DecisionTree struct {
PruneSplit float64
}
// Returns a new ID3DecisionTree with the specified test-prune
// ratio. Of the ratio is less than 0.001, the tree isn't pruned
func NewID3DecisionTree(prune float64) *ID3DecisionTree {
return &ID3DecisionTree{
base.BaseClassifier{},
@ -256,7 +261,7 @@ func (t *ID3DecisionTree) Predict(what *base.Instances) *base.Instances {
return t.Root.Predict(what)
}
// String returns a human-readable ID3 tree
// String returns a human-readable version of this ID3 tree
func (t *ID3DecisionTree) String() string {
return fmt.Sprintf("ID3DecisionTree(%s\n)", t.Root)
}

View File

@ -6,13 +6,14 @@ import (
"math/rand"
)
// RandomTreeRuleGenerator is used to generate decision rules for Random Trees
type RandomTreeRuleGenerator struct {
Attributes int
internalRule InformationGainRuleGenerator
}
// So WEKA returns a couple of possible attributes and evaluates
// the split criteria on each
// GenerateSplitAttribute returns the best attribute out of those randomly chosen
// which maximises Information Gain
func (r *RandomTreeRuleGenerator) GenerateSplitAttribute(f *base.Instances) base.Attribute {
// First step is to generate the random attributes that we'll consider
@ -44,12 +45,16 @@ func (r *RandomTreeRuleGenerator) GenerateSplitAttribute(f *base.Instances) base
return r.internalRule.GetSplitAttributeFromSelection(consideredAttributes, f)
}
// RandomTree builds a decision tree by considering a fixed number
// of randomly-chosen attributes at each node
type RandomTree struct {
base.BaseClassifier
Root *DecisionTreeNode
Rule *RandomTreeRuleGenerator
}
// NewRandomTree returns a new RandomTree which considers attrs randomly
// chosen attributes at each node.
func NewRandomTree(attrs int) *RandomTree {
return &RandomTree{
base.BaseClassifier{},
@ -71,10 +76,13 @@ func (rt *RandomTree) Predict(from *base.Instances) *base.Instances {
return rt.Root.Predict(from)
}
// String returns a human-readable representation of this structure
func (rt *RandomTree) String() string {
return fmt.Sprintf("RandomTree(%s)", rt.Root)
}
// Prune removes nodes from the tree which are detrimental
// to determining the accuracy of the test set (with)
func (rt *RandomTree) Prune(with *base.Instances) {
rt.Root.Prune(with)
}

View File

@ -1,2 +1,26 @@
// Package trees provides a number of tree based ensemble learners.
package trees
/*
This package implements decision trees.
ID3DecisionTree:
Builds a decision tree using the ID3 algorithm
by picking the Attribute which maximises
Information Gain at each node.
Attributes must be CategoricalAttributes at
present, so discretise beforehand (see
filters)
RandomTree:
Builds a decision tree using the ID3 algorithm
by picking the Attribute amongst those
randomly selected that maximises Information
Gain
Attributes must be CategoricalAttributes at
present, so discretise beforehand (see
filters)
*/
package trees