Added Comments for Regressor

2025-04-28 13:48:56 +08:00 · 2020-07-18 14:21:50 +05:30 · 2020-07-18 14:21:50 +05:30 · 08529c42cf
commit 08529c42cf
parent 16eac7d86d
2 changed files with 27 additions and 17 deletions
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@ -449,7 +449,7 @@ func cevaluateFromNode(tree CNode, xTest [][]float64, yTest []int64) float64 {
 	return accuracy
 }

-// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 	// Allocate problem array
 	_, rows := X.Size()
@ -474,7 +474,7 @@ func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 	return problemVec
 }

-// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) []int64 {
 	// Get the class Attributes
 	classAttrs := X.AllClassAttributes()
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@ -11,7 +11,7 @@ import (

 // The "r" prefix to all function names indicates that they were tailored to support regression.

-// See cart_classifier for details on functions.
+// RNode - Node struct for Decision Tree Regressor
 type RNode struct {
 	Left      *RNode
 	Right     *RNode
@ -22,6 +22,7 @@ type RNode struct {
 	Use_not   bool
 }

+// RTree - Tree struct for Decision Tree Regressor
 type RTree struct {
 	RootNode    *RNode
 	criterion   string
@ -29,6 +30,7 @@ type RTree struct {
 	triedSplits [][]float64
 }

+// Calculate Mean Absolute Error for a constant prediction
 func meanAbsoluteError(y []float64, yBar float64) float64 {
 	error := 0.0
 	for _, target := range y {
@ -38,6 +40,7 @@ func meanAbsoluteError(y []float64, yBar float64) float64 {
 	return error
 }

+// Find average
 func average(y []float64) float64 {
 	mean := 0.0
 	for _, value := range y {
@ -47,26 +50,30 @@ func average(y []float64) float64 {
 	return mean
 }

+// Turn Mean Absolute Error into impurity function for decision trees.
 func maeImpurity(y []float64) (float64, float64) {
 	yHat := average(y)
 	return meanAbsoluteError(y, yHat), yHat
 }

+// Calculate Mean Squared Error for constant prediction
 func meanSquaredError(y []float64, yBar float64) float64 {
 	error := 0.0
 	for _, target := range y {
-		item_error := target - yBar
-		error += math.Pow(item_error, 2)
+		itemError := target - yBar
+		error += math.Pow(itemError, 2)
 	}
 	error /= float64(len(y))
 	return error
 }

+// Convert mean squared error into impurity function for decision trees
 func mseImpurity(y []float64) (float64, float64) {
 	yHat := average(y)
 	return meanSquaredError(y, yHat), yHat
 }

+// Split the data based on threshold and feature for testing information gain
 func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
 	var left [][]float64
 	var lefty []float64
@ -87,6 +94,7 @@ func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64)
 	return left, right, lefty, righty
 }

+// Helper function for finding unique values
 func rstringInSlice(a float64, list []float64) bool {
 	for _, b := range list {
 		if b == a {
@ -96,6 +104,7 @@ func rstringInSlice(a float64, list []float64) bool {
 	return false
 }

+// Return only unique values of a feature
 func rfindUnique(data []float64) []float64 {
 	var unique []float64
 	for i := range data {
@ -106,6 +115,7 @@ func rfindUnique(data []float64) []float64 {
 	return unique
 }

+// Extract out a single feature from data
 func rgetFeature(data [][]float64, feature int64) []float64 {
 	var featureVals []float64
 	for i := range data {
@ -114,6 +124,7 @@ func rgetFeature(data [][]float64, feature int64) []float64 {
 	return featureVals
 }

+// Interface for creating new Decision Tree Regressor - cals rbestSplit()
 func NewDecisionTreeRegressor(criterion string, maxDepth int64) *RTree {
 	var tree RTree
 	tree.maxDepth = maxDepth
@ -121,6 +132,7 @@ func NewDecisionTreeRegressor(criterion string, maxDepth int64) *RTree {
 	return &tree
 }

+// Validate that the split being tested has not been done before.
 func rvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	for i := range triedSplits {
 		split := triedSplits[i]
@ -154,6 +166,7 @@ func rNewSlice(n []float64) *rSlice {
 	return s
 }

+// Re order data based on a feature for optimizing code
 func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
 	s := rNewSlice(featureVal)
 	sort.Sort(s)
@ -169,9 +182,9 @@ func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]floa
 	}

 	return dataSorted, ySorted
-
 }

+// Update the left and right data based on change in threshold
 func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty []float64, feature int64, threshold float64) ([][]float64, []float64, [][]float64, []float64) {

 	for right[0][feature] < threshold {
@ -184,14 +197,6 @@ func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty [
 	return left, lefty, right, righty
 }

-func sum(y []int64) int64 {
-	var sum_ int64 = 0
-	for i := range y {
-		sum_ += y[i]
-	}
-	return sum_
-}
-
 // Extra Method for creating simple to use interface. Many params are either redundant for user but are needed only for recursive logic.
 func (tree *RTree) Fit(X base.FixedDataGrid) {
 	var emptyNode RNode
@ -203,7 +208,7 @@ func (tree *RTree) Fit(X base.FixedDataGrid) {
 	tree.RootNode = &emptyNode
 }

-// Essentially the Fit Method
+// Essentially the Fit Method - Impelements recursive logic
 func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, criterion string, maxDepth int64, depth int64) RNode {

 	depth++
@ -328,11 +333,13 @@ func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, crit
 	return upperNode
 }

+// Print Tree for Visualtion - calls printTreeFromNode()
 func (tree *RTree) PrintTree() {
 	rootNode := *tree.RootNode
 	printTreeFromNode(rootNode, "")
 }

+// Use tree's root node to print out entire tree
 func printTreeFromNode(tree RNode, spacing string) float64 {

 	fmt.Print(spacing + "Feature ")
@ -364,6 +371,7 @@ func printTreeFromNode(tree RNode, spacing string) float64 {
 	return 0.0
 }

+// Predict a single data point
 func predictSingle(tree RNode, instance []float64) float64 {
 	if instance[tree.Feature] < tree.Threshold {
 		if tree.Left == nil {
@ -380,12 +388,14 @@ func predictSingle(tree RNode, instance []float64) float64 {
 	}
 }

+// Predict method for multiple data points. Calls predictFromNode()
 func (tree *RTree) Predict(X_test base.FixedDataGrid) []float64 {
 	root := *tree.RootNode
 	test := regressorConvertInstancesToProblemVec(X_test)
 	return predictFromNode(root, test)
 }

+// Use tree's root node to print out entire tree
 func predictFromNode(tree RNode, test [][]float64) []float64 {
 	var preds []float64
 	for i := range test {
@ -395,7 +405,7 @@ func predictFromNode(tree RNode, test [][]float64) []float64 {
 	return preds
 }

-// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 	// Allocate problem array
 	_, rows := X.Size()
@ -420,7 +430,7 @@ func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 	return problemVec
 }

-// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func regressorConvertInstancesToLabelVec(X base.FixedDataGrid) []float64 {
 	// Get the class Attributes
 	classAttrs := X.AllClassAttributes()