1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-28 13:48:56 +08:00

Added Comments for Regressor

This commit is contained in:
Ayush 2020-07-18 14:21:50 +05:30
parent 16eac7d86d
commit 08529c42cf
2 changed files with 27 additions and 17 deletions

View File

@ -449,7 +449,7 @@ func cevaluateFromNode(tree CNode, xTest [][]float64, yTest []int64) float64 {
return accuracy
}
// Helper function to convert base.FixedDataGrid into required format. Called in Fit
// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
// Allocate problem array
_, rows := X.Size()
@ -474,7 +474,7 @@ func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
return problemVec
}
// Helper function to convert base.FixedDataGrid into required format. Called in Fit
// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) []int64 {
// Get the class Attributes
classAttrs := X.AllClassAttributes()

View File

@ -11,7 +11,7 @@ import (
// The "r" prefix to all function names indicates that they were tailored to support regression.
// See cart_classifier for details on functions.
// RNode - Node struct for Decision Tree Regressor
type RNode struct {
Left *RNode
Right *RNode
@ -22,6 +22,7 @@ type RNode struct {
Use_not bool
}
// RTree - Tree struct for Decision Tree Regressor
type RTree struct {
RootNode *RNode
criterion string
@ -29,6 +30,7 @@ type RTree struct {
triedSplits [][]float64
}
// Calculate Mean Absolute Error for a constant prediction
func meanAbsoluteError(y []float64, yBar float64) float64 {
error := 0.0
for _, target := range y {
@ -38,6 +40,7 @@ func meanAbsoluteError(y []float64, yBar float64) float64 {
return error
}
// Find average
func average(y []float64) float64 {
mean := 0.0
for _, value := range y {
@ -47,26 +50,30 @@ func average(y []float64) float64 {
return mean
}
// Turn Mean Absolute Error into impurity function for decision trees.
func maeImpurity(y []float64) (float64, float64) {
yHat := average(y)
return meanAbsoluteError(y, yHat), yHat
}
// Calculate Mean Squared Error for constant prediction
func meanSquaredError(y []float64, yBar float64) float64 {
error := 0.0
for _, target := range y {
item_error := target - yBar
error += math.Pow(item_error, 2)
itemError := target - yBar
error += math.Pow(itemError, 2)
}
error /= float64(len(y))
return error
}
// Convert mean squared error into impurity function for decision trees
func mseImpurity(y []float64) (float64, float64) {
yHat := average(y)
return meanSquaredError(y, yHat), yHat
}
// Split the data based on threshold and feature for testing information gain
func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
var left [][]float64
var lefty []float64
@ -87,6 +94,7 @@ func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64)
return left, right, lefty, righty
}
// Helper function for finding unique values
func rstringInSlice(a float64, list []float64) bool {
for _, b := range list {
if b == a {
@ -96,6 +104,7 @@ func rstringInSlice(a float64, list []float64) bool {
return false
}
// Return only unique values of a feature
func rfindUnique(data []float64) []float64 {
var unique []float64
for i := range data {
@ -106,6 +115,7 @@ func rfindUnique(data []float64) []float64 {
return unique
}
// Extract out a single feature from data
func rgetFeature(data [][]float64, feature int64) []float64 {
var featureVals []float64
for i := range data {
@ -114,6 +124,7 @@ func rgetFeature(data [][]float64, feature int64) []float64 {
return featureVals
}
// Interface for creating new Decision Tree Regressor - cals rbestSplit()
func NewDecisionTreeRegressor(criterion string, maxDepth int64) *RTree {
var tree RTree
tree.maxDepth = maxDepth
@ -121,6 +132,7 @@ func NewDecisionTreeRegressor(criterion string, maxDepth int64) *RTree {
return &tree
}
// Validate that the split being tested has not been done before.
func rvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
for i := range triedSplits {
split := triedSplits[i]
@ -154,6 +166,7 @@ func rNewSlice(n []float64) *rSlice {
return s
}
// Re order data based on a feature for optimizing code
func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
s := rNewSlice(featureVal)
sort.Sort(s)
@ -169,9 +182,9 @@ func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]floa
}
return dataSorted, ySorted
}
// Update the left and right data based on change in threshold
func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty []float64, feature int64, threshold float64) ([][]float64, []float64, [][]float64, []float64) {
for right[0][feature] < threshold {
@ -184,14 +197,6 @@ func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty [
return left, lefty, right, righty
}
func sum(y []int64) int64 {
var sum_ int64 = 0
for i := range y {
sum_ += y[i]
}
return sum_
}
// Extra Method for creating simple to use interface. Many params are either redundant for user but are needed only for recursive logic.
func (tree *RTree) Fit(X base.FixedDataGrid) {
var emptyNode RNode
@ -203,7 +208,7 @@ func (tree *RTree) Fit(X base.FixedDataGrid) {
tree.RootNode = &emptyNode
}
// Essentially the Fit Method
// Essentially the Fit Method - Impelements recursive logic
func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, criterion string, maxDepth int64, depth int64) RNode {
depth++
@ -328,11 +333,13 @@ func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, crit
return upperNode
}
// Print Tree for Visualtion - calls printTreeFromNode()
func (tree *RTree) PrintTree() {
rootNode := *tree.RootNode
printTreeFromNode(rootNode, "")
}
// Use tree's root node to print out entire tree
func printTreeFromNode(tree RNode, spacing string) float64 {
fmt.Print(spacing + "Feature ")
@ -364,6 +371,7 @@ func printTreeFromNode(tree RNode, spacing string) float64 {
return 0.0
}
// Predict a single data point
func predictSingle(tree RNode, instance []float64) float64 {
if instance[tree.Feature] < tree.Threshold {
if tree.Left == nil {
@ -380,12 +388,14 @@ func predictSingle(tree RNode, instance []float64) float64 {
}
}
// Predict method for multiple data points. Calls predictFromNode()
func (tree *RTree) Predict(X_test base.FixedDataGrid) []float64 {
root := *tree.RootNode
test := regressorConvertInstancesToProblemVec(X_test)
return predictFromNode(root, test)
}
// Use tree's root node to print out entire tree
func predictFromNode(tree RNode, test [][]float64) []float64 {
var preds []float64
for i := range test {
@ -395,7 +405,7 @@ func predictFromNode(tree RNode, test [][]float64) []float64 {
return preds
}
// Helper function to convert base.FixedDataGrid into required format. Called in Fit
// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
// Allocate problem array
_, rows := X.Size()
@ -420,7 +430,7 @@ func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
return problemVec
}
// Helper function to convert base.FixedDataGrid into required format. Called in Fit
// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
func regressorConvertInstancesToLabelVec(X base.FixedDataGrid) []float64 {
// Get the class Attributes
classAttrs := X.AllClassAttributes()