From 884865294358810caf94588772398bf1aa1d8a5f Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Thu, 16 Jul 2020 13:37:34 +0530
Subject: [PATCH 01/24] Added Decision Tree Classifier

CART implementation of Decision Tree Classifier, based on Gini Impurity or Entropy, as selected by the user.
---
 trees/cart_classifier.go | 495 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 495 insertions(+)
 create mode 100644 trees/cart_classifier.go

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
new file mode 100644
index 0000000..c1e4043
--- /dev/null
+++ b/trees/cart_classifier.go
@@ -0,0 +1,495 @@
+package trees
+
+import (
+	"fmt"
+	"math"
+	"sort"
+	"strings"
+
+	"github.com/sjwhitworth/golearn/base"
+)
+
+// CNode is Node struct for Decision Tree Classifier
+type CNode struct {
+	Left       *CNode
+	Right      *CNode
+	Threshold  float64
+	Feature    int64
+	LeftLabel  int64
+	RightLabel int64
+	Use_not    bool
+	maxDepth   int64
+}
+
+// CTree: Tree struct for Decision Tree Classifier
+	RootNode    *CNode
+	criterion   string
+	maxDepth    int64
+	labels      []int64
+	triedSplits [][]float64
+}
+
+// Calculate Gini Impurity of Target Labels
+func giniImpurity(y []int64, labels []int64) (float64, int64) {
+	nInstances := len(y)
+	gini := 0.0
+	maxLabelCount := 0
+	var maxLabel int64 = 0
+	for label := range labels {
+		numLabel := 0
+		for target := range y {
+			if y[target] == labels[label] {
+				numLabel++
+			}
+		}
+		p := float64(numLabel) / float64(nInstances)
+		gini += p * (1 - p)
+		if numLabel > maxLabelCount {
+			maxLabel = labels[label]
+			maxLabelCount = numLabel
+		}
+	}
+	return gini, maxLabel
+}
+
+// Calculate Entropy loss of Target Labels
+func entropy(y []int64, labels []int64) (float64, int64) {
+	nInstances := len(y)
+	entropy := 0.0
+	maxLabelCount := 0
+	var maxLabel int64 = 0
+	for label := range labels {
+		numLabel := 0
+		for target := range y {
+			if y[target] == labels[label] {
+				numLabel++
+			}
+		}
+		p := float64(numLabel) / float64(nInstances)
+
+		logP := math.Log2(p)
+		if p == 0 {
+			logP = 0
+		}
+		entropy += -p * logP
+		if numLabel > maxLabelCount {
+			maxLabel = labels[label]
+			maxLabelCount = numLabel
+		}
+	}
+	return entropy, maxLabel
+}
+
+// Split the data into left node and right node based on feature and threshold - only needed for fresh nodes
+func testSplit(data [][]float64, feature int64, y []int64, threshold float64) ([][]float64, [][]float64, []int64, []int64) {
+	var left [][]float64
+	var right [][]float64
+	var lefty []int64
+	var righty []int64
+
+	for i := range data {
+		example := data[i]
+		if example[feature] < threshold {
+			left = append(left, example)
+			lefty = append(lefty, y[i])
+		} else {
+			right = append(right, example)
+			righty = append(righty, y[i])
+		}
+	}
+
+	return left, right, lefty, righty
+}
+
+// Helper Function to check if data point is unique or not
+func stringInSlice(a float64, list []float64) bool {
+	for _, b := range list {
+		if b == a {
+			return true
+		}
+	}
+	return false
+}
+
+// Isolate only unique values. Needed for splitting data.
+func findUnique(data []float64) []float64 {
+	var unique []float64
+	for i := range data {
+		if !stringInSlice(data[i], unique) {
+			unique = append(unique, data[i])
+		}
+	}
+	return unique
+}
+
+// Isolate only the feature being considered for splitting
+func getFeature(data [][]float64, feature int64) []float64 {
+	var featureVals []float64
+	for i := range data {
+		featureVals = append(featureVals, data[i][feature])
+	}
+	return featureVals
+}
+
+// Function to Create New Decision Tree Classifier
+func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64) *CTree {
+	var tree CTree
+	tree.criterion = strings.ToLower(criterion)
+	tree.maxDepth = maxDepth
+	tree.labels = labels
+
+	return &tree
+}
+
+// Make sure that split being considered has not been done before
+func validate(triedSplits [][]float64, feature int64, threshold float64) bool {
+	for i := range triedSplits {
+		split := triedSplits[i]
+		featureTried, thresholdTried := split[0], split[1]
+		if int64(featureTried) == feature && thresholdTried == threshold {
+			return false
+		}
+	}
+	return true
+}
+
+// Helper struct for re-rdering data
+type cSlice struct {
+	sort.Float64Slice
+	Idx []int
+}
+
+// Helper function for re-ordering data
+func (s cSlice) cSwap(i, j int) {
+	s.Float64Slice.Swap(i, j)
+	s.Idx[i], s.Idx[j] = s.Idx[j], s.Idx[i]
+}
+
+// Final Helper Function for re-ordering data
+func cNewSlice(n []float64) *cSlice {
+	s := &cSlice{Float64Slice: sort.Float64Slice(n), Idx: make([]int, len(n))}
+
+	for i := range s.Idx {
+		s.Idx[i] = i
+	}
+	return s
+}
+
+// Reorder the data by feature being considered. Optimizes code by reducing the number of times we have to loop over data for splitting
+func reOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64, []int64) {
+	s := cNewSlice(featureVal)
+	sort.Sort(s)
+
+	indexes := s.Idx
+
+	var dataSorted [][]float64
+	var ySorted []int64
+
+	for _, index := range indexes {
+		dataSorted = append(dataSorted, data[index])
+		ySorted = append(ySorted, y[index])
+	}
+
+	return dataSorted, ySorted
+}
+
+// Change data in Left Node and Right Node based on change in threshold
+func updateSplit(left [][]float64, lefty []int64, right [][]float64, righty []int64, feature int64, threshold float64) ([][]float64, []int64, [][]float64, []int64) {
+
+	for right[0][feature] < threshold {
+		left = append(left, right[0])
+		right = right[1:]
+		lefty = append(lefty, righty[0])
+		righty = righty[1:]
+	}
+
+	return left, lefty, right, righty
+}
+
+// Fit - Method visible to user to train tree
+func (tree *CTree) Fit(X base.FixedDataGrid) {
+	var emptyNode CNode
+
+	data := classifierConvertInstancesToProblemVec(X)
+	y := classifierConvertInstancesToLabelVec(X)
+	emptyNode = bestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)
+
+	tree.RootNode = &emptyNode
+}
+
+// Iterativly find and record the best split - recursive function
+func bestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNode CNode, criterion string, maxDepth int64, depth int64) CNode {
+
+	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
+	depth++
+
+	if maxDepth != -1 && depth > maxDepth {
+		return upperNode
+	}
+
+	numFeatures := len(data[0])
+	var bestGini float64
+	var origGini float64
+
+	// Calculate loss based on Criterion Specified by user
+	if criterion == "gini" {
+		origGini, upperNode.LeftLabel = giniImpurity(y, labels)
+	} else if criterion == "entropy" {
+		origGini, upperNode.LeftLabel = entropy(y, labels)
+	} else {
+		panic("Invalid impurity function, choose from GINI or ENTROPY")
+	}
+
+	bestGini = origGini
+
+	bestLeft := data
+	bestRight := data
+	bestLefty := y
+	bestRighty := y
+
+	numData := len(data)
+
+	bestLeftGini := bestGini
+	bestRightGini := bestGini
+
+	upperNode.Use_not = true
+
+	var leftN CNode
+	var rightN CNode
+	// Iterate over all features
+	for i := 0; i < numFeatures; i++ {
+		featureVal := getFeature(data, int64(i))
+		unique := findUnique(featureVal)
+		sort.Float64s(unique)
+		numUnique := len(unique)
+
+		sortData, sortY := reOrderData(featureVal, data, y)
+
+		firstTime := true
+
+		var left, right [][]float64
+		var lefty, righty []int64
+		// Iterate over all possible thresholds for that feature
+		for j := range unique {
+			if j != (numUnique - 1) {
+				threshold := (unique[j] + unique[j+1]) / 2
+				// Ensure that same split has not been made before
+				if validate(tree.triedSplits, int64(i), threshold) {
+					// We need to split data from fresh when considering new feature for the first time.
+					// Otherwise, we need to update the split by moving data points from left to right.
+					if firstTime {
+						left, right, lefty, righty = testSplit(sortData, int64(i), sortY, threshold)
+						firstTime = false
+					} else {
+						left, lefty, right, righty = updateSplit(left, lefty, right, righty, int64(i), threshold)
+					}
+
+					var leftGini float64
+					var rightGini float64
+					var leftLabels int64
+					var rightLabels int64
+
+					if criterion == "gini" {
+						leftGini, leftLabels = giniImpurity(lefty, labels)
+						rightGini, rightLabels = giniImpurity(righty, labels)
+					} else if criterion == "entropy" {
+						leftGini, leftLabels = entropy(lefty, labels)
+						rightGini, rightLabels = entropy(righty, labels)
+					}
+					// Calculate weighted gini impurity of child nodes
+					subGini := (leftGini * float64(len(left)) / float64(numData)) + (rightGini * float64(len(right)) / float64(numData))
+
+					// If we find a split that reduces impurity
+					if subGini < bestGini {
+						bestGini = subGini
+						bestLeft = left
+						bestRight = right
+						bestLefty = lefty
+						bestRighty = righty
+						upperNode.Threshold = threshold
+						upperNode.Feature = int64(i)
+
+						upperNode.LeftLabel = leftLabels
+						upperNode.RightLabel = rightLabels
+
+						bestLeftGini = leftGini
+						bestRightGini = rightGini
+					}
+				}
+
+			}
+		}
+	}
+	// If no split was found, we don't want to use this node, so we will flag it
+	if bestGini == origGini {
+		upperNode.Use_not = false
+		return upperNode
+	}
+	// Until nodes are not pure
+	if bestGini > 0 {
+
+		// If left node is pure, no need to split on left side again
+		if bestLeftGini > 0 {
+			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
+			// Recursive splitting logic
+			leftN = bestSplit(tree, bestLeft, bestLefty, labels, leftN, criterion, maxDepth, depth)
+			if leftN.Use_not == true {
+				upperNode.Left = &leftN
+			}
+
+		}
+		// If right node is pure, no need to split on right side again
+		if bestRightGini > 0 {
+			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
+			// Recursive splitting logic
+			rightN = bestSplit(tree, bestRight, bestRighty, labels, rightN, criterion, maxDepth, depth)
+			if rightN.Use_not == true {
+				upperNode.Right = &rightN
+			}
+
+		}
+
+	}
+	// Return the node - contains all information regarding feature and threshold.
+	return upperNode
+}
+
+// PrintTree : this function prints out entire tree for visualization - visible to user
+func (tree *CTree) PrintTree() {
+	rootNode := *tree.RootNode
+	printTreeFromNode(rootNode, "")
+}
+
+// Tree struct has root node. That is used to print tree - invisible to user but called from PrintTree
+func printTreeFromNode(tree CNode, spacing string) float64 {
+
+	fmt.Print(spacing + "Feature ")
+	fmt.Print(tree.Feature)
+	fmt.Print(" < ")
+	fmt.Println(tree.Threshold)
+
+	if tree.Left == nil {
+		fmt.Println(spacing + "---> True")
+		fmt.Print("  " + spacing + "PREDICT    ")
+		fmt.Println(tree.LeftLabel)
+	}
+	if tree.Right == nil {
+		fmt.Println(spacing + "---> FALSE")
+		fmt.Print("  " + spacing + "PREDICT    ")
+		fmt.Println(tree.RightLabel)
+	}
+
+	if tree.Left != nil {
+		fmt.Println(spacing + "---> True")
+		printTreeFromNode(*tree.Left, spacing+"  ")
+	}
+
+	if tree.Right != nil {
+		fmt.Println(spacing + "---> False")
+		printTreeFromNode(*tree.Right, spacing+"  ")
+	}
+
+	return 0.0
+}
+
+// Predict a single data point by traversing the entire tree
+func predictSingle(tree CNode, instance []float64) int64 {
+	if instance[tree.Feature] < tree.Threshold {
+		if tree.Left == nil {
+			return tree.LeftLabel
+		} else {
+			return predictSingle(*tree.Left, instance)
+		}
+	} else {
+		if tree.Right == nil {
+			return tree.RightLabel
+		} else {
+			return predictSingle(*tree.Right, instance)
+		}
+	}
+}
+
+// Predict is visible to user. Given test data, they receive predictions for every datapoint.
+func (tree *CTree) Predict(test [][]float64) []int64 {
+	root := *tree.RootNode
+
+	return predictFromNode(root, test)
+}
+
+// This function uses the rootnode from Predict. It is invisible to user, but called from predict method.
+func predictFromNode(tree CNode, test [][]float64) []int64 {
+	var preds []int64
+	for i := range test {
+		iPred := predictSingle(tree, test[i])
+		preds = append(preds, iPred)
+	}
+	return preds
+}
+
+// Given Test data and label, return the accuracy of the classifier. Data has to be in float slice format before feeding.
+func (tree *CTree) Evaluate(xTest [][]float64, yTest []int64) float64 {
+	rootNode := *tree.RootNode
+	return evaluateFromNode(rootNode, xTest, yTest)
+}
+
+func evaluateFromNode(tree CNode, xTest [][]float64, yTest []int64) float64 {
+	preds := predictFromNode(tree, xTest)
+	accuracy := 0.0
+	for i := range preds {
+		if preds[i] == yTest[i] {
+			accuracy++
+		}
+	}
+	accuracy /= float64(len(yTest))
+	return accuracy
+}
+
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
+	// Allocate problem array
+	_, rows := X.Size()
+	problemVec := make([][]float64, rows)
+
+	// Retrieve numeric non-class Attributes
+	numericAttrs := base.NonClassFloatAttributes(X)
+	numericAttrSpecs := base.ResolveAttributes(X, numericAttrs)
+
+	// Convert each row
+	X.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
+		// Allocate a new row
+		probRow := make([]float64, len(numericAttrSpecs))
+		// Read out the row
+		for i, _ := range numericAttrSpecs {
+			probRow[i] = base.UnpackBytesToFloat(row[i])
+		}
+		// Add the row
+		problemVec[rowNo] = probRow
+		return true, nil
+	})
+	return problemVec
+}
+
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) []int64 {
+	// Get the class Attributes
+	classAttrs := X.AllClassAttributes()
+	// Only support 1 class Attribute
+	if len(classAttrs) != 1 {
+		panic(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs)))
+	}
+	// ClassAttribute must be numeric
+	if _, ok := classAttrs[0].(*base.FloatAttribute); !ok {
+		panic(fmt.Sprintf("%s: ClassAttribute must be a FloatAttribute", classAttrs[0]))
+	}
+	// Allocate return structure
+	_, rows := X.Size()
+	// labelVec := make([]float64, rows)
+	labelVec := make([]int64, rows)
+	// Resolve class Attribute specification
+	classAttrSpecs := base.ResolveAttributes(X, classAttrs)
+	X.MapOverRows(classAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
+		labelVec[rowNo] = int64(base.UnpackBytesToFloat(row[0]))
+		return true, nil
+	})
+	return labelVec
+}

From d1228c55083966c4e3aa8b99abe3968e28ab3c18 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 18 Jul 2020 10:47:22 +0530
Subject: [PATCH 02/24] Adding Integration For Fixed Data Grid in Predict And
 Evaluate

---
 linear_models/logistic.go |  1 +
 trees/cart_classifier.go  | 71 ++++++++++++++++++++-------------------
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/linear_models/logistic.go b/linear_models/logistic.go
index 96c3206..14ff0d2 100644
--- a/linear_models/logistic.go
+++ b/linear_models/logistic.go
@@ -3,6 +3,7 @@ package linear_models
 import (
 	"errors"
 	"fmt"
+
 	"github.com/sjwhitworth/golearn/base"
 )
 
diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index c1e4043..90139bf 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -22,6 +22,7 @@ type CNode struct {
 }
 
 // CTree: Tree struct for Decision Tree Classifier
+type CTree struct {
 	RootNode    *CNode
 	criterion   string
 	maxDepth    int64
@@ -81,7 +82,7 @@ func entropy(y []int64, labels []int64) (float64, int64) {
 }
 
 // Split the data into left node and right node based on feature and threshold - only needed for fresh nodes
-func testSplit(data [][]float64, feature int64, y []int64, threshold float64) ([][]float64, [][]float64, []int64, []int64) {
+func ctestSplit(data [][]float64, feature int64, y []int64, threshold float64) ([][]float64, [][]float64, []int64, []int64) {
 	var left [][]float64
 	var right [][]float64
 	var lefty []int64
@@ -102,7 +103,7 @@ func testSplit(data [][]float64, feature int64, y []int64, threshold float64) ([
 }
 
 // Helper Function to check if data point is unique or not
-func stringInSlice(a float64, list []float64) bool {
+func cstringInSlice(a float64, list []float64) bool {
 	for _, b := range list {
 		if b == a {
 			return true
@@ -112,10 +113,10 @@ func stringInSlice(a float64, list []float64) bool {
 }
 
 // Isolate only unique values. Needed for splitting data.
-func findUnique(data []float64) []float64 {
+func cfindUnique(data []float64) []float64 {
 	var unique []float64
 	for i := range data {
-		if !stringInSlice(data[i], unique) {
+		if !cstringInSlice(data[i], unique) {
 			unique = append(unique, data[i])
 		}
 	}
@@ -123,7 +124,7 @@ func findUnique(data []float64) []float64 {
 }
 
 // Isolate only the feature being considered for splitting
-func getFeature(data [][]float64, feature int64) []float64 {
+func cgetFeature(data [][]float64, feature int64) []float64 {
 	var featureVals []float64
 	for i := range data {
 		featureVals = append(featureVals, data[i][feature])
@@ -142,7 +143,7 @@ func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64)
 }
 
 // Make sure that split being considered has not been done before
-func validate(triedSplits [][]float64, feature int64, threshold float64) bool {
+func cvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	for i := range triedSplits {
 		split := triedSplits[i]
 		featureTried, thresholdTried := split[0], split[1]
@@ -176,7 +177,7 @@ func cNewSlice(n []float64) *cSlice {
 }
 
 // Reorder the data by feature being considered. Optimizes code by reducing the number of times we have to loop over data for splitting
-func reOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64, []int64) {
+func creOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64, []int64) {
 	s := cNewSlice(featureVal)
 	sort.Sort(s)
 
@@ -194,7 +195,7 @@ func reOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64
 }
 
 // Change data in Left Node and Right Node based on change in threshold
-func updateSplit(left [][]float64, lefty []int64, right [][]float64, righty []int64, feature int64, threshold float64) ([][]float64, []int64, [][]float64, []int64) {
+func cupdateSplit(left [][]float64, lefty []int64, right [][]float64, righty []int64, feature int64, threshold float64) ([][]float64, []int64, [][]float64, []int64) {
 
 	for right[0][feature] < threshold {
 		left = append(left, right[0])
@@ -212,13 +213,13 @@ func (tree *CTree) Fit(X base.FixedDataGrid) {
 
 	data := classifierConvertInstancesToProblemVec(X)
 	y := classifierConvertInstancesToLabelVec(X)
-	emptyNode = bestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)
+	emptyNode = cbestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)
 
 	tree.RootNode = &emptyNode
 }
 
 // Iterativly find and record the best split - recursive function
-func bestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNode CNode, criterion string, maxDepth int64, depth int64) CNode {
+func cbestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNode CNode, criterion string, maxDepth int64, depth int64) CNode {
 
 	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
 	depth++
@@ -258,12 +259,12 @@ func bestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNod
 	var rightN CNode
 	// Iterate over all features
 	for i := 0; i < numFeatures; i++ {
-		featureVal := getFeature(data, int64(i))
-		unique := findUnique(featureVal)
+		featureVal := cgetFeature(data, int64(i))
+		unique := cfindUnique(featureVal)
 		sort.Float64s(unique)
 		numUnique := len(unique)
 
-		sortData, sortY := reOrderData(featureVal, data, y)
+		sortData, sortY := creOrderData(featureVal, data, y)
 
 		firstTime := true
 
@@ -274,14 +275,14 @@ func bestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNod
 			if j != (numUnique - 1) {
 				threshold := (unique[j] + unique[j+1]) / 2
 				// Ensure that same split has not been made before
-				if validate(tree.triedSplits, int64(i), threshold) {
+				if cvalidate(tree.triedSplits, int64(i), threshold) {
 					// We need to split data from fresh when considering new feature for the first time.
 					// Otherwise, we need to update the split by moving data points from left to right.
 					if firstTime {
-						left, right, lefty, righty = testSplit(sortData, int64(i), sortY, threshold)
+						left, right, lefty, righty = ctestSplit(sortData, int64(i), sortY, threshold)
 						firstTime = false
 					} else {
-						left, lefty, right, righty = updateSplit(left, lefty, right, righty, int64(i), threshold)
+						left, lefty, right, righty = cupdateSplit(left, lefty, right, righty, int64(i), threshold)
 					}
 
 					var leftGini float64
@@ -332,7 +333,7 @@ func bestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNod
 		if bestLeftGini > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			// Recursive splitting logic
-			leftN = bestSplit(tree, bestLeft, bestLefty, labels, leftN, criterion, maxDepth, depth)
+			leftN = cbestSplit(tree, bestLeft, bestLefty, labels, leftN, criterion, maxDepth, depth)
 			if leftN.Use_not == true {
 				upperNode.Left = &leftN
 			}
@@ -342,7 +343,7 @@ func bestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNod
 		if bestRightGini > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			// Recursive splitting logic
-			rightN = bestSplit(tree, bestRight, bestRighty, labels, rightN, criterion, maxDepth, depth)
+			rightN = cbestSplit(tree, bestRight, bestRighty, labels, rightN, criterion, maxDepth, depth)
 			if rightN.Use_not == true {
 				upperNode.Right = &rightN
 			}
@@ -357,11 +358,11 @@ func bestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNod
 // PrintTree : this function prints out entire tree for visualization - visible to user
 func (tree *CTree) PrintTree() {
 	rootNode := *tree.RootNode
-	printTreeFromNode(rootNode, "")
+	cprintTreeFromNode(rootNode, "")
 }
 
 // Tree struct has root node. That is used to print tree - invisible to user but called from PrintTree
-func printTreeFromNode(tree CNode, spacing string) float64 {
+func cprintTreeFromNode(tree CNode, spacing string) float64 {
 
 	fmt.Print(spacing + "Feature ")
 	fmt.Print(tree.Feature)
@@ -381,59 +382,61 @@ func printTreeFromNode(tree CNode, spacing string) float64 {
 
 	if tree.Left != nil {
 		fmt.Println(spacing + "---> True")
-		printTreeFromNode(*tree.Left, spacing+"  ")
+		cprintTreeFromNode(*tree.Left, spacing+"  ")
 	}
 
 	if tree.Right != nil {
 		fmt.Println(spacing + "---> False")
-		printTreeFromNode(*tree.Right, spacing+"  ")
+		cprintTreeFromNode(*tree.Right, spacing+"  ")
 	}
 
 	return 0.0
 }
 
 // Predict a single data point by traversing the entire tree
-func predictSingle(tree CNode, instance []float64) int64 {
+func cpredictSingle(tree CNode, instance []float64) int64 {
 	if instance[tree.Feature] < tree.Threshold {
 		if tree.Left == nil {
 			return tree.LeftLabel
 		} else {
-			return predictSingle(*tree.Left, instance)
+			return cpredictSingle(*tree.Left, instance)
 		}
 	} else {
 		if tree.Right == nil {
 			return tree.RightLabel
 		} else {
-			return predictSingle(*tree.Right, instance)
+			return cpredictSingle(*tree.Right, instance)
 		}
 	}
 }
 
 // Predict is visible to user. Given test data, they receive predictions for every datapoint.
-func (tree *CTree) Predict(test [][]float64) []int64 {
+func (tree *CTree) Predict(X_test base.FixedDataGrid) []int64 {
 	root := *tree.RootNode
-
-	return predictFromNode(root, test)
+	test := classifierConvertInstancesToProblemVec(X_test)
+	return cpredictFromNode(root, test)
 }
 
 // This function uses the rootnode from Predict. It is invisible to user, but called from predict method.
-func predictFromNode(tree CNode, test [][]float64) []int64 {
+func cpredictFromNode(tree CNode, test [][]float64) []int64 {
 	var preds []int64
 	for i := range test {
-		iPred := predictSingle(tree, test[i])
+		iPred := cpredictSingle(tree, test[i])
 		preds = append(preds, iPred)
 	}
 	return preds
 }
 
 // Given Test data and label, return the accuracy of the classifier. Data has to be in float slice format before feeding.
-func (tree *CTree) Evaluate(xTest [][]float64, yTest []int64) float64 {
+func (tree *CTree) Evaluate(test base.FixedDataGrid) float64 {
 	rootNode := *tree.RootNode
-	return evaluateFromNode(rootNode, xTest, yTest)
+	xTest := classifierConvertInstancesToProblemVec(test)
+	yTest := classifierConvertInstancesToLabelVec(test)
+	return cevaluateFromNode(rootNode, xTest, yTest)
 }
 
-func evaluateFromNode(tree CNode, xTest [][]float64, yTest []int64) float64 {
-	preds := predictFromNode(tree, xTest)
+func cevaluateFromNode(tree CNode, xTest [][]float64, yTest []int64) float64 {
+	preds := cpredictFromNode(tree, xTest)
 	accuracy := 0.0
 	for i := range preds {
 		if preds[i] == yTest[i] {

From 16eac7d86d464f6059fc842fbc1e845c07af5ff2 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 18 Jul 2020 12:26:50 +0530
Subject: [PATCH 03/24] Adding Regression Trees

---
 trees/cart_classifier.go |   2 +
 trees/cart_regressor.go  | 446 +++++++++++++++++++++++++++++++++++++++
 trees/tmp                | Bin 413 -> 409 bytes
 3 files changed, 448 insertions(+)
 create mode 100644 trees/cart_regressor.go

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 90139bf..373fdad 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -9,6 +9,8 @@ import (
 	"github.com/sjwhitworth/golearn/base"
 )
 
+// The "c" prefix to function names indicates that they were tailored for classification
+
 // CNode is Node struct for Decision Tree Classifier
 type CNode struct {
 	Left       *CNode
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
new file mode 100644
index 0000000..2a962e8
--- /dev/null
+++ b/trees/cart_regressor.go
@@ -0,0 +1,446 @@
+package trees
+
+import (
+	"fmt"
+	"math"
+	"sort"
+	"strings"
+
+	"github.com/sjwhitworth/golearn/base"
+)
+
+// The "r" prefix to all function names indicates that they were tailored to support regression.
+
+// See cart_classifier for details on functions.
+type RNode struct {
+	Left      *RNode
+	Right     *RNode
+	Threshold float64
+	Feature   int64
+	LeftPred  float64
+	RightPred float64
+	Use_not   bool
+}
+
+type RTree struct {
+	RootNode    *RNode
+	criterion   string
+	maxDepth    int64
+	triedSplits [][]float64
+}
+
+func meanAbsoluteError(y []float64, yBar float64) float64 {
+	error := 0.0
+	for _, target := range y {
+		error += math.Abs(target - yBar)
+	}
+	error /= float64(len(y))
+	return error
+}
+
+func average(y []float64) float64 {
+	mean := 0.0
+	for _, value := range y {
+		mean += value
+	}
+	mean /= float64(len(y))
+	return mean
+}
+
+func maeImpurity(y []float64) (float64, float64) {
+	yHat := average(y)
+	return meanAbsoluteError(y, yHat), yHat
+}
+
+func meanSquaredError(y []float64, yBar float64) float64 {
+	error := 0.0
+	for _, target := range y {
+		item_error := target - yBar
+		error += math.Pow(item_error, 2)
+	}
+	error /= float64(len(y))
+	return error
+}
+
+func mseImpurity(y []float64) (float64, float64) {
+	yHat := average(y)
+	return meanSquaredError(y, yHat), yHat
+}
+
+func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
+	var left [][]float64
+	var lefty []float64
+	var right [][]float64
+	var righty []float64
+
+	for i := range data {
+		example := data[i]
+		if example[feature] < threshold {
+			left = append(left, example)
+			lefty = append(lefty, y[i])
+		} else {
+			right = append(right, example)
+			righty = append(righty, y[i])
+		}
+	}
+
+	return left, right, lefty, righty
+}
+
+func rstringInSlice(a float64, list []float64) bool {
+	for _, b := range list {
+		if b == a {
+			return true
+		}
+	}
+	return false
+}
+
+func rfindUnique(data []float64) []float64 {
+	var unique []float64
+	for i := range data {
+		if !rstringInSlice(data[i], unique) {
+			unique = append(unique, data[i])
+		}
+	}
+	return unique
+}
+
+func rgetFeature(data [][]float64, feature int64) []float64 {
+	var featureVals []float64
+	for i := range data {
+		featureVals = append(featureVals, data[i][feature])
+	}
+	return featureVals
+}
+
+func NewDecisionTreeRegressor(criterion string, maxDepth int64) *RTree {
+	var tree RTree
+	tree.maxDepth = maxDepth
+	tree.criterion = strings.ToLower(criterion)
+	return &tree
+}
+
+func rvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
+	for i := range triedSplits {
+		split := triedSplits[i]
+		featureTried, thresholdTried := split[0], split[1]
+		if int64(featureTried) == feature && thresholdTried == threshold {
+			return false
+		}
+	}
+	return true
+}
+
+// Helper struct for re-rdering data
+type rSlice struct {
+	sort.Float64Slice
+	Idx []int
+}
+
+// Helper function for re-ordering data
+func (s rSlice) rSwap(i, j int) {
+	s.Float64Slice.Swap(i, j)
+	s.Idx[i], s.Idx[j] = s.Idx[j], s.Idx[i]
+}
+
+// Final Helper Function for re-ordering data
+func rNewSlice(n []float64) *rSlice {
+	s := &rSlice{Float64Slice: sort.Float64Slice(n), Idx: make([]int, len(n))}
+
+	for i := range s.Idx {
+		s.Idx[i] = i
+	}
+	return s
+}
+
+func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
+	s := rNewSlice(featureVal)
+	sort.Sort(s)
+
+	indexes := s.Idx
+
+	var dataSorted [][]float64
+	var ySorted []float64
+
+	for _, index := range indexes {
+		dataSorted = append(dataSorted, data[index])
+		ySorted = append(ySorted, y[index])
+	}
+
+	return dataSorted, ySorted
+
+}
+
+func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty []float64, feature int64, threshold float64) ([][]float64, []float64, [][]float64, []float64) {
+
+	for right[0][feature] < threshold {
+		left = append(left, right[0])
+		right = right[1:]
+		lefty = append(lefty, righty[0])
+		righty = righty[1:]
+	}
+
+	return left, lefty, right, righty
+}
+
+func sum(y []int64) int64 {
+	var sum_ int64 = 0
+	for i := range y {
+		sum_ += y[i]
+	}
+	return sum_
+}
+
+// Extra Method for creating simple to use interface. Many params are either redundant for user but are needed only for recursive logic.
+func (tree *RTree) Fit(X base.FixedDataGrid) {
+	var emptyNode RNode
+	data := regressorConvertInstancesToProblemVec(X)
+	y := regressorConvertInstancesToLabelVec(X)
+
+	emptyNode = rbestSplit(*tree, data, y, emptyNode, tree.criterion, tree.maxDepth, 0)
+
+	tree.RootNode = &emptyNode
+}
+
+// Essentially the Fit Method
+func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, criterion string, maxDepth int64, depth int64) RNode {
+
+	depth++
+
+	if depth > maxDepth && maxDepth != -1 {
+		return upperNode
+	}
+
+	numFeatures := len(data[0])
+	var bestLoss float64
+	var origLoss float64
+
+	if criterion == "mae" {
+		origLoss, upperNode.LeftPred = maeImpurity(y)
+	} else {
+		origLoss, upperNode.LeftPred = mseImpurity(y)
+	}
+
+	bestLoss = origLoss
+
+	bestLeft := data
+	bestRight := data
+	bestLefty := y
+	bestRighty := y
+
+	numData := len(data)
+
+	bestLeftLoss := bestLoss
+	bestRightLoss := bestLoss
+
+	upperNode.Use_not = true
+
+	var leftN RNode
+	var rightN RNode
+	// Iterate over all features
+	for i := 0; i < numFeatures; i++ {
+		featureVal := rgetFeature(data, int64(i))
+		unique := rfindUnique(featureVal)
+		sort.Float64s(unique)
+		numUnique := len(unique)
+
+		sortData, sortY := rreOrderData(featureVal, data, y)
+
+		firstTime := true
+
+		var left, right [][]float64
+		var lefty, righty []float64
+
+		for j := range unique {
+			if j != (numUnique - 1) {
+				threshold := (unique[j] + unique[j+1]) / 2
+				if rvalidate(tree.triedSplits, int64(i), threshold) {
+					if firstTime {
+						left, right, lefty, righty = rtestSplit(sortData, int64(i), sortY, threshold)
+						firstTime = false
+					} else {
+						left, lefty, right, righty = rupdateSplit(left, lefty, right, righty, int64(i), threshold)
+					}
+
+					var leftLoss float64
+					var rightLoss float64
+					var leftPred float64
+					var rightPred float64
+
+					if criterion == "mae" {
+						leftLoss, leftPred = maeImpurity(lefty)
+						rightLoss, rightPred = maeImpurity(righty)
+					} else {
+						leftLoss, leftPred = mseImpurity(lefty)
+						rightLoss, rightPred = mseImpurity(righty)
+					}
+
+					subLoss := (leftLoss * float64(len(left)) / float64(numData)) + (rightLoss * float64(len(right)) / float64(numData))
+
+					if subLoss < bestLoss {
+						bestLoss = subLoss
+						bestLeft = left
+						bestRight = right
+						bestLefty = lefty
+						bestRighty = righty
+						upperNode.Threshold = threshold
+						upperNode.Feature = int64(i)
+
+						upperNode.LeftPred = leftPred
+						upperNode.RightPred = rightPred
+
+						bestLeftLoss = leftLoss
+						bestRightLoss = rightLoss
+					}
+				}
+
+			}
+		}
+	}
+
+	if bestLoss == origLoss {
+		upperNode.Use_not = false
+		return upperNode
+	}
+
+	if bestLoss > 0 {
+
+		if bestLeftLoss > 0 {
+			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
+			leftN = rbestSplit(tree, bestLeft, bestLefty, leftN, criterion, maxDepth, depth)
+			if leftN.Use_not == true {
+				upperNode.Left = &leftN
+			}
+
+		}
+		if bestRightLoss > 0 {
+			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
+			rightN = rbestSplit(tree, bestRight, bestRighty, rightN, criterion, maxDepth, depth)
+			if rightN.Use_not == true {
+				upperNode.Right = &rightN
+			}
+
+		}
+
+	}
+
+	return upperNode
+}
+
+func (tree *RTree) PrintTree() {
+	rootNode := *tree.RootNode
+	printTreeFromNode(rootNode, "")
+}
+
+func printTreeFromNode(tree RNode, spacing string) float64 {
+
+	fmt.Print(spacing + "Feature ")
+	fmt.Print(tree.Feature)
+	fmt.Print(" < ")
+	fmt.Println(tree.Threshold)
+
+	if tree.Left == nil {
+		fmt.Println(spacing + "---> True")
+		fmt.Print("  " + spacing + "PREDICT    ")
+		fmt.Println(tree.LeftPred)
+	}
+	if tree.Right == nil {
+		fmt.Println(spacing + "---> FALSE")
+		fmt.Print("  " + spacing + "PREDICT    ")
+		fmt.Println(tree.RightPred)
+	}
+
+	if tree.Left != nil {
+		fmt.Println(spacing + "---> True")
+		printTreeFromNode(*tree.Left, spacing+"  ")
+	}
+
+	if tree.Right != nil {
+		fmt.Println(spacing + "---> False")
+		printTreeFromNode(*tree.Right, spacing+"  ")
+	}
+
+	return 0.0
+}
+
+func predictSingle(tree RNode, instance []float64) float64 {
+	if instance[tree.Feature] < tree.Threshold {
+		if tree.Left == nil {
+			return tree.LeftPred
+		} else {
+			return predictSingle(*tree.Left, instance)
+		}
+	} else {
+		if tree.Right == nil {
+			return tree.RightPred
+		} else {
+			return predictSingle(*tree.Right, instance)
+		}
+	}
+}
+
+func (tree *RTree) Predict(X_test base.FixedDataGrid) []float64 {
+	root := *tree.RootNode
+	test := regressorConvertInstancesToProblemVec(X_test)
+	return predictFromNode(root, test)
+}
+
+func predictFromNode(tree RNode, test [][]float64) []float64 {
+	var preds []float64
+	for i := range test {
+		i_pred := predictSingle(tree, test[i])
+		preds = append(preds, i_pred)
+	}
+	return preds
+}
+
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
+	// Allocate problem array
+	_, rows := X.Size()
+	problemVec := make([][]float64, rows)
+
+	// Retrieve numeric non-class Attributes
+	numericAttrs := base.NonClassFloatAttributes(X)
+	numericAttrSpecs := base.ResolveAttributes(X, numericAttrs)
+
+	// Convert each row
+	X.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
+		// Allocate a new row
+		probRow := make([]float64, len(numericAttrSpecs))
+		// Read out the row
+		for i, _ := range numericAttrSpecs {
+			probRow[i] = base.UnpackBytesToFloat(row[i])
+		}
+		// Add the row
+		problemVec[rowNo] = probRow
+		return true, nil
+	})
+	return problemVec
+}
+
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+func regressorConvertInstancesToLabelVec(X base.FixedDataGrid) []float64 {
+	// Get the class Attributes
+	classAttrs := X.AllClassAttributes()
+	// Only support 1 class Attribute
+	if len(classAttrs) != 1 {
+		panic(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs)))
+	}
+	// ClassAttribute must be numeric
+	if _, ok := classAttrs[0].(*base.FloatAttribute); !ok {
+		panic(fmt.Sprintf("%s: ClassAttribute must be a FloatAttribute", classAttrs[0]))
+	}
+	// Allocate return structure
+	_, rows := X.Size()
+	// labelVec := make([]float64, rows)
+	labelVec := make([]float64, rows)
+	// Resolve class Attribute specification
+	classAttrSpecs := base.ResolveAttributes(X, classAttrs)
+	X.MapOverRows(classAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
+		labelVec[rowNo] = base.UnpackBytesToFloat(row[0])
+		return true, nil
+	})
+	return labelVec
+}
diff --git a/trees/tmp b/trees/tmp
index 28c93c507c8869a97a9ff1d9eecbd160475a62e3..af98d1a33b82338d7466955c2c6aafb41cd3496c 100644
GIT binary patch
delta 379
zcmV->0fhdY1DOMmCw~M9DHzboLCB!AxFoTN!GL1B&^<RqKze>oYGP5If}!4!aq=jj
zBJgz$addGEaipakgz~?ku?brKH!w0c7|s9m0M$xq`9-;jCGlmcMa7x<c}iA>I!eho
ziN(d4X_=`-N>)mqF2+haO3rYhFqlFmLp=i}WX18hsU?XiiGL-DN>+KLIXSfh?YB`t
z7ytkO0RR8&l(7oJKn#Z8C3A<ipu@e)QN5_aBQ3fA6e)doIa?82ob1$p3Veht`F<Gl
zzi<5Vzis#5{@1%^^M8$uq<vtI$H24t7m2}GXN4ELeUJolBc=WXaG9Iw%@R!XOzs0K
z3Vf7O6282bw{#r&wLL_E?*NlS29u+#SMBdg)xvIFBv(&cbF7vp00030{{lr2xNLVU
zDJi0n9R%xt19SBDzu9R0PajaN1j>(~+*y{GQ<_?=WEG9b$g#CLN_mO7si>J7QdTG>
ZCzhn9=NDxrC*~-PX4s($00000|NlQXwFm$J

delta 383
zcmV-_0f7FQ1DykqCx0+EHfB(um3vS?X>mzn5rcsNrRJb}Zis;N{G8OpqC5pdy&>b|
zQ9woD>l)(d;uzvcOFIbVe?wyv^!#sRXfT@p=>e*h((;RP6HDUDQj3Z+^YfIf40V)}
za}tY-Gt)9ti<GRCJY9^Hbd;RoLSZn4N``s{O2~@ib5lzaQ-2am5|ym-N^^2*2ik9=
zfG_|600960?3A&p!!Qg*|3!0#B*QcKXG~tLwxUK3_H|1M`S;=`rF81lOu3WbAhhUQ
zB<5e=_~w7#|9$%38e_WM|1C0+M$ZoSkr(yP5`1D^6khFiUNXo|l;aWLI(PGnWhBuP
z**9#+^FeA!xOn<rE|tK~?J)@403`V@V6s&7qVcBGtn4}jvhBP#$7Xo|009600!0wG
zY<DavDWZ`b1nYkTbF}*3&}_8+rw^!B0_8_g?kr2pDNQX_vWiAz<k(srrM$%4RMgB3
dDJztc6H8Ll^NTW*6LXYCGwe_W00000|Np{jyq*96


From 08529c42cf21909127674b62c8385fbc2eb7c64c Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 18 Jul 2020 14:21:50 +0530
Subject: [PATCH 04/24] Added Comments for Regressor

---
 trees/cart_classifier.go |  4 ++--
 trees/cart_regressor.go  | 40 +++++++++++++++++++++++++---------------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 373fdad..f9cb6a1 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -449,7 +449,7 @@ func cevaluateFromNode(tree CNode, xTest [][]float64, yTest []int64) float64 {
 	return accuracy
 }
 
-// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 	// Allocate problem array
 	_, rows := X.Size()
@@ -474,7 +474,7 @@ func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 	return problemVec
 }
 
-// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) []int64 {
 	// Get the class Attributes
 	classAttrs := X.AllClassAttributes()
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index 2a962e8..7ec044a 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -11,7 +11,7 @@ import (
 
 // The "r" prefix to all function names indicates that they were tailored to support regression.
 
-// See cart_classifier for details on functions.
+// RNode - Node struct for Decision Tree Regressor
 type RNode struct {
 	Left      *RNode
 	Right     *RNode
@@ -22,6 +22,7 @@ type RNode struct {
 	Use_not   bool
 }
 
+// RTree - Tree struct for Decision Tree Regressor
 type RTree struct {
 	RootNode    *RNode
 	criterion   string
@@ -29,6 +30,7 @@ type RTree struct {
 	triedSplits [][]float64
 }
 
+// Calculate Mean Absolute Error for a constant prediction
 func meanAbsoluteError(y []float64, yBar float64) float64 {
 	error := 0.0
 	for _, target := range y {
@@ -38,6 +40,7 @@ func meanAbsoluteError(y []float64, yBar float64) float64 {
 	return error
 }
 
+// Find average
 func average(y []float64) float64 {
 	mean := 0.0
 	for _, value := range y {
@@ -47,26 +50,30 @@ func average(y []float64) float64 {
 	return mean
 }
 
+// Turn Mean Absolute Error into impurity function for decision trees.
 func maeImpurity(y []float64) (float64, float64) {
 	yHat := average(y)
 	return meanAbsoluteError(y, yHat), yHat
 }
 
+// Calculate Mean Squared Error for constant prediction
 func meanSquaredError(y []float64, yBar float64) float64 {
 	error := 0.0
 	for _, target := range y {
-		item_error := target - yBar
-		error += math.Pow(item_error, 2)
+		itemError := target - yBar
+		error += math.Pow(itemError, 2)
 	}
 	error /= float64(len(y))
 	return error
 }
 
+// Convert mean squared error into impurity function for decision trees
 func mseImpurity(y []float64) (float64, float64) {
 	yHat := average(y)
 	return meanSquaredError(y, yHat), yHat
 }
 
+// Split the data based on threshold and feature for testing information gain
 func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
 	var left [][]float64
 	var lefty []float64
@@ -87,6 +94,7 @@ func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64)
 	return left, right, lefty, righty
 }
 
+// Helper function for finding unique values
 func rstringInSlice(a float64, list []float64) bool {
 	for _, b := range list {
 		if b == a {
@@ -96,6 +104,7 @@ func rstringInSlice(a float64, list []float64) bool {
 	return false
 }
 
+// Return only unique values of a feature
 func rfindUnique(data []float64) []float64 {
 	var unique []float64
 	for i := range data {
@@ -106,6 +115,7 @@ func rfindUnique(data []float64) []float64 {
 	return unique
 }
 
+// Extract out a single feature from data
 func rgetFeature(data [][]float64, feature int64) []float64 {
 	var featureVals []float64
 	for i := range data {
@@ -114,6 +124,7 @@ func rgetFeature(data [][]float64, feature int64) []float64 {
 	return featureVals
 }
 
+// Interface for creating new Decision Tree Regressor - cals rbestSplit()
 func NewDecisionTreeRegressor(criterion string, maxDepth int64) *RTree {
 	var tree RTree
 	tree.maxDepth = maxDepth
@@ -121,6 +132,7 @@ func NewDecisionTreeRegressor(criterion string, maxDepth int64) *RTree {
 	return &tree
 }
 
+// Validate that the split being tested has not been done before.
 func rvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	for i := range triedSplits {
 		split := triedSplits[i]
@@ -154,6 +166,7 @@ func rNewSlice(n []float64) *rSlice {
 	return s
 }
 
+// Re order data based on a feature for optimizing code
 func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
 	s := rNewSlice(featureVal)
 	sort.Sort(s)
@@ -169,9 +182,9 @@ func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]floa
 	}
 
 	return dataSorted, ySorted
-
 }
 
+// Update the left and right data based on change in threshold
 func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty []float64, feature int64, threshold float64) ([][]float64, []float64, [][]float64, []float64) {
 
 	for right[0][feature] < threshold {
@@ -184,14 +197,6 @@ func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty [
 	return left, lefty, right, righty
 }
 
-func sum(y []int64) int64 {
-	var sum_ int64 = 0
-	for i := range y {
-		sum_ += y[i]
-	}
-	return sum_
-}
-
 // Extra Method for creating simple to use interface. Many params are either redundant for user but are needed only for recursive logic.
 func (tree *RTree) Fit(X base.FixedDataGrid) {
 	var emptyNode RNode
@@ -203,7 +208,7 @@ func (tree *RTree) Fit(X base.FixedDataGrid) {
 	tree.RootNode = &emptyNode
 }
 
-// Essentially the Fit Method
+// Essentially the Fit Method - Impelements recursive logic
 func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, criterion string, maxDepth int64, depth int64) RNode {
 
 	depth++
@@ -328,11 +333,13 @@ func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, crit
 	return upperNode
 }
 
+// Print Tree for Visualtion - calls printTreeFromNode()
 func (tree *RTree) PrintTree() {
 	rootNode := *tree.RootNode
 	printTreeFromNode(rootNode, "")
 }
 
+// Use tree's root node to print out entire tree
 func printTreeFromNode(tree RNode, spacing string) float64 {
 
 	fmt.Print(spacing + "Feature ")
@@ -364,6 +371,7 @@ func printTreeFromNode(tree RNode, spacing string) float64 {
 	return 0.0
 }
 
+// Predict a single data point
 func predictSingle(tree RNode, instance []float64) float64 {
 	if instance[tree.Feature] < tree.Threshold {
 		if tree.Left == nil {
@@ -380,12 +388,14 @@ func predictSingle(tree RNode, instance []float64) float64 {
 	}
 }
 
+// Predict method for multiple data points. Calls predictFromNode()
 func (tree *RTree) Predict(X_test base.FixedDataGrid) []float64 {
 	root := *tree.RootNode
 	test := regressorConvertInstancesToProblemVec(X_test)
 	return predictFromNode(root, test)
 }
 
+// Use tree's root node to print out entire tree
 func predictFromNode(tree RNode, test [][]float64) []float64 {
 	var preds []float64
 	for i := range test {
@@ -395,7 +405,7 @@ func predictFromNode(tree RNode, test [][]float64) []float64 {
 	return preds
 }
 
-// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 	// Allocate problem array
 	_, rows := X.Size()
@@ -420,7 +430,7 @@ func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 	return problemVec
 }
 
-// Helper function to convert base.FixedDataGrid into required format. Called in Fit
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func regressorConvertInstancesToLabelVec(X base.FixedDataGrid) []float64 {
 	// Get the class Attributes
 	classAttrs := X.AllClassAttributes()

From c0837595238c35be0f0dfad8f05061811412007e Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Wed, 22 Jul 2020 14:34:59 +0530
Subject: [PATCH 05/24] Adding Changes

---
 trees/cart_classifier.go | 59 +++++++++++++++++----------------
 trees/cart_regressor.go  | 70 +++++++++++++++++++++-------------------
 2 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index f9cb6a1..29646fb 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"math"
 	"sort"
+	"strconv"
 	"strings"
 
 	"github.com/sjwhitworth/golearn/base"
@@ -23,8 +24,8 @@ type CNode struct {
 	maxDepth   int64
 }
 
-// CTree: Tree struct for Decision Tree Classifier
-type CTree struct {
+// CARTDecisionTreeClassifier: Tree struct for Decision Tree Classifier
+type CARTDecisionTreeClassifier struct {
 	RootNode    *CNode
 	criterion   string
 	maxDepth    int64
@@ -135,8 +136,8 @@ func cgetFeature(data [][]float64, feature int64) []float64 {
 }
 
 // Function to Create New Decision Tree Classifier
-func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64) *CTree {
-	var tree CTree
+func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64) *CARTDecisionTreeClassifier {
+	var tree CARTDecisionTreeClassifier
 	tree.criterion = strings.ToLower(criterion)
 	tree.maxDepth = maxDepth
 	tree.labels = labels
@@ -210,7 +211,7 @@ func cupdateSplit(left [][]float64, lefty []int64, right [][]float64, righty []i
 }
 
 // Fit - Method visible to user to train tree
-func (tree *CTree) Fit(X base.FixedDataGrid) {
+func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) {
 	var emptyNode CNode
 
 	data := classifierConvertInstancesToProblemVec(X)
@@ -221,7 +222,7 @@ func (tree *CTree) Fit(X base.FixedDataGrid) {
 }
 
 // Iterativly find and record the best split - recursive function
-func cbestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNode CNode, criterion string, maxDepth int64, depth int64) CNode {
+func cbestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, labels []int64, upperNode CNode, criterion string, maxDepth int64, depth int64) CNode {
 
 	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
 	depth++
@@ -358,41 +359,43 @@ func cbestSplit(tree CTree, data [][]float64, y []int64, labels []int64, upperNo
 }
 
 // PrintTree : this function prints out entire tree for visualization - visible to user
-func (tree *CTree) PrintTree() {
+func (tree *CARTDecisionTreeClassifier) String() string {
 	rootNode := *tree.RootNode
-	cprintTreeFromNode(rootNode, "")
+	return cprintTreeFromNode(rootNode, "")
 }
 
-// Tree struct has root node. That is used to print tree - invisible to user but called from PrintTree
-func cprintTreeFromNode(tree CNode, spacing string) float64 {
-
-	fmt.Print(spacing + "Feature ")
-	fmt.Print(tree.Feature)
-	fmt.Print(" < ")
-	fmt.Println(tree.Threshold)
+func cprintTreeFromNode(tree CNode, spacing string) string {
+	returnString := ""
+	returnString += spacing + "Feature "
+	returnString += strconv.FormatInt(tree.Feature, 10)
+	returnString += " < "
+	returnString += fmt.Sprintf("%.3f", tree.Threshold)
+	returnString += "\n"
 
 	if tree.Left == nil {
-		fmt.Println(spacing + "---> True")
-		fmt.Print("  " + spacing + "PREDICT    ")
-		fmt.Println(tree.LeftLabel)
+		returnString += spacing + "---> True" + "\n"
+		returnString += "  " + spacing + "PREDICT    "
+		returnString += strconv.FormatInt(tree.LeftLabel, 10) + "\n"
+
 	}
 	if tree.Right == nil {
-		fmt.Println(spacing + "---> FALSE")
-		fmt.Print("  " + spacing + "PREDICT    ")
-		fmt.Println(tree.RightLabel)
+
+		returnString += spacing + "---> False" + "\n"
+		returnString += "  " + spacing + "PREDICT    "
+		returnString += strconv.FormatInt(tree.RightLabel, 10) + "\n"
 	}
 
 	if tree.Left != nil {
-		fmt.Println(spacing + "---> True")
-		cprintTreeFromNode(*tree.Left, spacing+"  ")
+		returnString += spacing + "---> True" + "\n"
+		returnString += cprintTreeFromNode(*tree.Left, spacing+"  ")
 	}
 
 	if tree.Right != nil {
-		fmt.Println(spacing + "---> False")
-		cprintTreeFromNode(*tree.Right, spacing+"  ")
+		returnString += spacing + "---> False" + "\n"
+		returnString += cprintTreeFromNode(*tree.Right, spacing+"  ")
 	}
 
-	return 0.0
+	return returnString
 }
 
 // Predict a single data point by traversing the entire tree
@@ -413,7 +416,7 @@ func cpredictSingle(tree CNode, instance []float64) int64 {
 }
 
 // Predict is visible to user. Given test data, they receive predictions for every datapoint.
-func (tree *CTree) Predict(X_test base.FixedDataGrid) []int64 {
+func (tree *CARTDecisionTreeClassifier) Predict(X_test base.FixedDataGrid) []int64 {
 	root := *tree.RootNode
 	test := classifierConvertInstancesToProblemVec(X_test)
 	return cpredictFromNode(root, test)
@@ -430,7 +433,7 @@ func cpredictFromNode(tree CNode, test [][]float64) []int64 {
 }
 
 // Given Test data and label, return the accuracy of the classifier. Data has to be in float slice format before feeding.
-func (tree *CTree) Evaluate(test base.FixedDataGrid) float64 {
+func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) float64 {
 	rootNode := *tree.RootNode
 	xTest := classifierConvertInstancesToProblemVec(test)
 	yTest := classifierConvertInstancesToLabelVec(test)
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index 7ec044a..48e61d0 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"math"
 	"sort"
+	"strconv"
 	"strings"
 
 	"github.com/sjwhitworth/golearn/base"
@@ -22,8 +23,8 @@ type RNode struct {
 	Use_not   bool
 }
 
-// RTree - Tree struct for Decision Tree Regressor
-type RTree struct {
+// CARTDecisionTreeRegressor - Tree struct for Decision Tree Regressor
+type CARTDecisionTreeRegressor struct {
 	RootNode    *RNode
 	criterion   string
 	maxDepth    int64
@@ -125,8 +126,8 @@ func rgetFeature(data [][]float64, feature int64) []float64 {
 }
 
 // Interface for creating new Decision Tree Regressor - cals rbestSplit()
-func NewDecisionTreeRegressor(criterion string, maxDepth int64) *RTree {
-	var tree RTree
+func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDecisionTreeRegressor {
+	var tree CARTDecisionTreeRegressor
 	tree.maxDepth = maxDepth
 	tree.criterion = strings.ToLower(criterion)
 	return &tree
@@ -198,7 +199,7 @@ func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty [
 }
 
 // Extra Method for creating simple to use interface. Many params are either redundant for user but are needed only for recursive logic.
-func (tree *RTree) Fit(X base.FixedDataGrid) {
+func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) {
 	var emptyNode RNode
 	data := regressorConvertInstancesToProblemVec(X)
 	y := regressorConvertInstancesToLabelVec(X)
@@ -209,7 +210,7 @@ func (tree *RTree) Fit(X base.FixedDataGrid) {
 }
 
 // Essentially the Fit Method - Impelements recursive logic
-func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, criterion string, maxDepth int64, depth int64) RNode {
+func rbestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, upperNode RNode, criterion string, maxDepth int64, depth int64) RNode {
 
 	depth++
 
@@ -334,72 +335,75 @@ func rbestSplit(tree RTree, data [][]float64, y []float64, upperNode RNode, crit
 }
 
 // Print Tree for Visualtion - calls printTreeFromNode()
-func (tree *RTree) PrintTree() {
+func (tree *CARTDecisionTreeRegressor) String() string {
 	rootNode := *tree.RootNode
-	printTreeFromNode(rootNode, "")
+	return rprintTreeFromNode(rootNode, "")
 }
 
-// Use tree's root node to print out entire tree
-func printTreeFromNode(tree RNode, spacing string) float64 {
-
-	fmt.Print(spacing + "Feature ")
-	fmt.Print(tree.Feature)
-	fmt.Print(" < ")
-	fmt.Println(tree.Threshold)
+func rprintTreeFromNode(tree RNode, spacing string) string {
+	returnString := ""
+	returnString += spacing + "Feature "
+	returnString += strconv.FormatInt(tree.Feature, 10)
+	returnString += " < "
+	returnString += fmt.Sprintf("%.3f", tree.Threshold)
+	returnString += "\n"
 
 	if tree.Left == nil {
-		fmt.Println(spacing + "---> True")
-		fmt.Print("  " + spacing + "PREDICT    ")
-		fmt.Println(tree.LeftPred)
+		returnString += spacing + "---> True" + "\n"
+		returnString += "  " + spacing + "PREDICT    "
+		returnString += fmt.Sprintf("%.3f", tree.LeftPred) + "\n"
 	}
 	if tree.Right == nil {
-		fmt.Println(spacing + "---> FALSE")
-		fmt.Print("  " + spacing + "PREDICT    ")
-		fmt.Println(tree.RightPred)
+
+		returnString += spacing + "---> False" + "\n"
+		returnString += "  " + spacing + "PREDICT    "
+		returnString += fmt.Sprintf("%.3f", tree.RightPred) + "\n"
 	}
 
 	if tree.Left != nil {
-		fmt.Println(spacing + "---> True")
-		printTreeFromNode(*tree.Left, spacing+"  ")
+		// fmt.Println(spacing + "---> True")
+		returnString += spacing + "---> True" + "\n"
+		returnString += rprintTreeFromNode(*tree.Left, spacing+"  ")
 	}
 
 	if tree.Right != nil {
-		fmt.Println(spacing + "---> False")
-		printTreeFromNode(*tree.Right, spacing+"  ")
+		// fmt.Println(spacing + "---> False")
+		returnString += spacing + "---> False" + "\n"
+		returnString += rprintTreeFromNode(*tree.Right, spacing+"  ")
 	}
 
-	return 0.0
+	return returnString
 }
 
 // Predict a single data point
-func predictSingle(tree RNode, instance []float64) float64 {
+func rpredictSingle(tree RNode, instance []float64) float64 {
 	if instance[tree.Feature] < tree.Threshold {
 		if tree.Left == nil {
 			return tree.LeftPred
 		} else {
-			return predictSingle(*tree.Left, instance)
+			return rpredictSingle(*tree.Left, instance)
 		}
 	} else {
 		if tree.Right == nil {
 			return tree.RightPred
 		} else {
-			return predictSingle(*tree.Right, instance)
+			return rpredictSingle(*tree.Right, instance)
 		}
 	}
 }
 
 // Predict method for multiple data points. Calls predictFromNode()
-func (tree *RTree) Predict(X_test base.FixedDataGrid) []float64 {
+func (tree *CARTDecisionTreeRegressor) Predict(X_test base.FixedDataGrid) []float64 {
 	root := *tree.RootNode
 	test := regressorConvertInstancesToProblemVec(X_test)
-	return predictFromNode(root, test)
+	return rpredictFromNode(root, test)
 }
 
 // Use tree's root node to print out entire tree
-func predictFromNode(tree RNode, test [][]float64) []float64 {
+func rpredictFromNode(tree RNode, test [][]float64) []float64 {
 	var preds []float64
 	for i := range test {
-		i_pred := predictSingle(tree, test[i])
+		i_pred := rpredictSingle(tree, test[i])
 		preds = append(preds, i_pred)
 	}
 	return preds

From b16b60fcb56f801070b89a4867e69422a78dd81a Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Thu, 23 Jul 2020 16:45:31 +0530
Subject: [PATCH 06/24] Adding Example script for CART

---
 examples/datasets/boston_house_prices.csv | 1460 +++++++++++++++++++++
 examples/datasets/titanic.csv             |  889 +++++++++++++
 examples/trees/cart.go                    |   61 +
 trees/cart_classifier.go                  |    2 +-
 4 files changed, 2411 insertions(+), 1 deletion(-)
 create mode 100644 examples/datasets/boston_house_prices.csv
 create mode 100644 examples/datasets/titanic.csv
 create mode 100644 examples/trees/cart.go

diff --git a/examples/datasets/boston_house_prices.csv b/examples/datasets/boston_house_prices.csv
new file mode 100644
index 0000000..a330ca0
--- /dev/null
+++ b/examples/datasets/boston_house_prices.csv
@@ -0,0 +1,1460 @@
+7,208500
+6,181500
+7,223500
+7,140000
+8,250000
+5,143000
+8,307000
+7,200000
+7,129900
+5,118000
+5,129500
+9,345000
+5,144000
+7,279500
+6,157000
+7,132000
+6,149000
+4,90000
+5,159000
+5,139000
+8,325300
+7,139400
+8,230000
+5,129900
+5,154000
+8,256300
+5,134800
+8,306000
+5,207500
+4,68500
+4,40000
+5,149350
+8,179900
+5,165500
+9,277500
+8,309000
+5,145000
+5,153000
+5,109000
+4,82000
+6,160000
+5,170000
+5,144000
+5,130250
+5,141000
+9,319900
+7,239686
+8,249700
+4,113000
+5,127000
+6,177000
+6,114500
+5,110000
+9,385000
+5,130000
+6,180500
+8,172500
+7,196500
+10,438780
+5,124900
+6,158000
+5,101000
+8,202500
+7,140000
+7,219500
+8,317000
+7,180000
+7,226000
+4,80000
+7,225000
+7,244000
+4,129500
+7,185000
+5,144900
+3,107400
+4,91000
+4,135750
+5,127000
+4,136500
+5,110000
+6,193500
+6,153500
+8,245000
+5,126500
+7,168500
+8,260000
+6,174000
+6,164500
+3,85000
+4,123600
+4,109900
+5,98600
+5,163500
+6,133900
+6,204750
+6,185000
+7,214000
+4,94750
+5,83000
+4,128950
+6,205000
+6,178000
+5,118964
+7,198900
+7,169500
+8,250000
+4,100000
+5,115000
+5,115000
+6,190000
+6,136900
+7,180000
+7,383970
+6,217000
+6,259500
+6,176000
+5,139000
+5,155000
+7,320000
+6,163990
+6,180000
+4,100000
+6,136000
+6,153900
+6,181000
+6,84500
+6,128000
+5,87000
+6,155000
+5,150000
+7,226000
+6,244000
+5,150750
+8,220000
+5,180000
+7,174000
+5,143000
+7,171000
+8,230000
+6,231500
+4,115000
+7,260000
+5,166000
+7,204000
+5,125000
+6,130000
+5,105000
+7,222500
+7,141000
+5,115000
+5,122000
+8,372402
+6,190000
+6,235000
+6,125000
+6,79000
+5,109500
+8,269500
+7,254900
+7,320000
+6,162500
+9,412500
+7,220000
+4,103200
+6,152000
+5,127500
+5,190000
+8,325624
+7,183500
+8,228000
+5,128500
+6,215000
+7,239000
+6,163000
+6,184000
+6,243000
+6,211000
+5,172500
+9,501837
+5,100000
+6,177000
+7,200100
+5,120000
+7,200000
+5,127000
+10,475000
+7,173000
+5,135000
+5,153337
+8,286000
+8,315000
+7,184000
+7,192000
+7,130000
+5,127000
+6,148500
+7,311872
+8,235000
+6,104000
+8,274900
+4,140000
+6,171500
+6,112000
+6,149000
+5,110000
+7,180500
+5,143900
+4,141000
+7,277000
+6,145000
+5,98000
+6,186000
+7,252678
+5,156000
+6,161750
+5,134450
+7,210000
+4,107000
+7,311500
+7,167240
+7,204900
+6,200000
+6,179900
+4,97000
+10,386250
+5,112000
+7,290000
+6,106000
+5,125000
+7,192500
+6,148000
+8,403000
+6,94500
+5,128200
+6,216500
+6,89500
+7,185500
+7,194500
+8,318000
+6,113000
+8,262500
+5,110500
+5,79000
+6,120000
+7,205000
+7,241500
+6,137000
+6,140000
+7,180000
+6,277000
+3,76500
+8,235000
+6,173000
+6,158000
+5,145000
+7,230000
+6,207500
+7,220000
+7,231500
+5,97000
+6,176000
+8,276000
+6,151000
+5,130000
+5,73000
+6,175500
+6,185000
+5,179500
+5,120500
+6,148000
+8,266000
+7,241500
+8,290000
+6,139000
+5,124500
+7,205000
+7,201000
+4,141000
+9,415298
+7,192000
+7,228500
+6,185000
+7,207500
+8,244600
+6,179200
+7,164700
+6,159000
+4,88000
+5,122000
+6,153575
+8,233230
+5,135900
+5,131000
+7,235000
+6,167000
+6,142500
+5,152000
+7,239000
+6,175000
+6,158500
+5,157000
+8,267000
+7,205000
+5,149900
+7,295000
+8,305900
+7,225000
+6,89500
+4,82500
+9,360000
+6,165600
+6,132000
+5,119900
+7,375000
+7,178000
+7,188500
+7,260000
+8,270000
+7,260000
+7,187500
+9,342643
+8,354000
+7,301000
+3,126175
+7,242000
+5,87000
+8,324000
+6,145250
+6,214500
+5,78000
+5,119000
+5,139000
+8,284000
+7,207000
+6,192000
+5,228950
+9,377426
+7,214000
+7,202500
+6,155000
+8,202900
+4,82000
+3,87500
+9,266000
+5,85000
+6,140200
+6,151500
+6,157500
+7,154000
+9,437154
+9,318061
+7,190000
+5,95000
+6,105900
+6,140000
+6,177500
+6,173000
+5,134000
+5,130000
+8,280000
+6,156000
+5,145000
+7,198500
+6,118000
+6,190000
+5,147000
+6,159000
+6,165000
+5,132000
+5,162000
+6,172400
+4,134432
+6,125000
+5,123000
+7,219500
+1,61000
+5,148000
+8,340000
+9,394432
+6,179000
+5,127000
+7,187750
+7,213500
+6,76000
+6,240000
+8,192000
+5,81000
+6,125000
+7,191000
+10,426000
+5,119000
+6,215000
+5,106500
+4,100000
+5,109000
+5,129000
+5,123000
+5,169500
+5,67000
+7,241000
+8,245500
+7,164990
+5,108000
+8,258000
+6,168000
+4,150000
+6,115000
+6,177000
+7,280000
+8,339750
+5,60000
+5,145000
+7,222000
+5,115000
+7,228000
+7,181134
+6,149500
+6,239000
+5,126000
+5,142000
+7,206300
+6,215000
+5,113000
+8,315000
+6,139000
+7,135000
+7,275000
+4,109008
+7,195400
+6,175000
+6,85400
+6,79900
+5,122500
+6,181000
+4,81000
+7,212000
+6,116000
+6,119000
+5,90350
+6,110000
+10,555000
+4,118000
+5,162900
+7,172500
+7,210000
+6,127500
+6,190000
+7,199900
+6,119500
+3,120000
+6,110000
+7,280000
+6,204000
+8,210000
+5,188000
+7,175500
+5,98000
+4,256000
+8,161000
+5,110000
+8,263435
+7,155000
+5,62383
+6,188700
+5,124000
+7,178740
+7,167000
+5,146500
+8,250000
+6,187000
+8,212000
+7,190000
+6,148000
+8,440000
+8,251000
+5,132500
+6,208900
+9,380000
+8,297000
+4,89471
+9,326000
+9,374000
+7,155000
+6,164000
+5,132500
+5,147000
+5,156000
+5,175000
+5,160000
+4,86000
+5,115000
+6,133000
+6,172785
+5,155000
+5,91300
+4,34900
+8,430000
+7,184000
+5,130000
+5,120000
+6,113000
+7,226700
+5,140000
+7,289000
+6,147000
+5,124500
+8,215000
+6,208300
+7,161000
+5,124500
+5,164900
+7,202665
+5,129900
+6,134000
+5,96500
+10,402861
+6,158000
+7,265000
+6,211000
+7,234000
+4,106250
+6,150000
+6,159000
+10,184750
+7,315750
+7,176000
+5,132000
+9,446261
+4,86000
+6,200624
+6,175000
+6,128000
+5,107500
+1,39300
+8,178000
+5,107500
+7,188000
+4,111250
+5,158000
+8,272000
+9,315000
+8,248000
+7,213250
+7,133000
+7,179665
+7,229000
+6,210000
+5,129500
+5,125000
+7,263000
+6,140000
+5,112500
+8,255500
+4,108000
+7,284000
+5,113000
+5,141000
+4,108000
+7,175000
+7,234000
+5,121500
+5,170000
+5,108000
+6,185000
+7,268000
+6,128000
+9,325000
+7,214000
+8,316600
+5,135960
+5,142600
+6,120000
+7,224500
+7,170000
+5,139000
+5,118500
+7,145000
+5,164500
+7,146000
+5,131500
+6,181900
+8,253293
+6,118500
+10,325000
+4,133000
+8,369900
+6,130000
+5,137000
+5,143000
+5,79500
+7,185900
+10,451950
+5,138000
+6,140000
+5,110000
+8,319000
+6,114504
+7,194201
+5,217500
+6,151000
+8,275000
+6,141000
+8,220000
+7,151000
+7,221000
+7,205000
+5,152000
+5,225000
+8,359100
+4,118500
+9,313000
+6,148000
+8,261500
+5,147000
+4,75500
+6,137500
+6,183200
+6,105500
+9,314813
+8,305000
+3,67000
+6,240000
+5,135000
+6,168500
+6,165150
+6,160000
+5,139900
+6,153000
+5,135000
+6,168500
+5,124000
+8,209500
+7,82500
+5,139400
+6,144000
+6,200000
+2,60000
+5,93000
+5,85000
+8,264561
+8,274000
+7,226000
+8,345000
+5,152000
+9,370878
+6,143250
+5,98300
+6,155000
+6,155000
+4,84500
+7,205950
+4,108000
+7,191000
+6,135000
+8,350000
+6,88000
+5,145500
+7,149000
+6,97500
+5,167000
+7,197900
+8,402000
+6,110000
+4,137500
+8,423000
+8,230500
+6,129000
+6,193500
+5,168000
+4,137500
+6,173500
+6,103600
+6,165000
+6,257500
+6,140000
+6,148500
+4,87000
+5,109500
+8,372500
+5,128500
+6,143000
+5,159434
+6,173000
+9,285000
+7,221000
+7,207500
+7,227875
+7,148800
+8,392000
+6,194700
+6,141000
+10,755000
+7,335000
+5,108480
+5,141500
+6,176000
+5,89000
+5,123500
+5,138500
+7,196000
+8,312500
+7,140000
+8,361919
+5,140000
+7,213000
+4,55000
+7,302000
+8,254000
+7,179540
+5,109900
+3,52000
+4,102776
+8,189000
+4,129000
+6,130500
+6,165000
+7,159500
+5,157000
+7,341000
+5,128500
+8,275000
+6,143000
+4,124500
+4,135000
+9,320000
+4,120500
+6,222000
+7,194500
+5,110000
+4,103000
+8,236500
+7,187500
+7,222500
+5,131400
+5,108000
+7,163000
+3,93500
+8,239900
+5,179000
+7,190000
+5,132000
+6,142000
+7,179000
+5,175000
+8,180000
+8,299800
+7,236000
+7,265979
+7,260400
+4,98000
+4,96500
+7,162000
+6,217000
+8,275500
+6,156000
+6,172500
+8,212000
+6,158900
+7,179400
+8,290000
+6,127500
+5,100000
+7,215200
+8,337000
+8,270000
+9,264132
+7,196500
+6,160000
+7,216837
+8,538000
+5,134900
+4,102000
+6,107000
+5,114500
+8,395000
+6,162000
+7,221500
+5,142500
+5,144000
+6,135000
+7,176000
+6,175900
+7,187100
+5,165500
+6,128000
+6,161500
+5,139000
+7,233000
+4,107900
+6,187500
+7,160200
+6,146800
+7,269790
+8,225000
+7,194500
+6,171000
+6,143500
+5,110000
+9,485000
+5,175000
+6,200000
+4,109900
+7,189000
+9,582933
+5,118000
+7,227680
+5,135500
+5,223500
+5,159950
+5,106000
+6,181000
+6,144500
+5,55993
+6,157900
+5,116000
+7,224900
+5,137000
+8,271000
+6,155000
+7,224000
+7,183000
+4,93000
+7,225000
+6,139500
+8,232600
+10,385000
+5,109500
+7,189000
+5,185000
+7,147400
+6,166000
+7,151000
+7,237000
+6,167000
+5,139950
+4,128000
+5,153500
+6,100000
+5,144000
+5,130500
+6,140000
+5,157500
+6,174900
+5,141000
+5,153900
+5,171000
+7,213000
+5,133500
+6,240000
+6,187000
+6,131500
+8,215000
+7,164000
+6,158000
+5,170000
+5,127000
+6,147000
+6,174000
+7,152000
+6,250000
+7,189950
+5,131500
+6,152000
+5,132500
+7,250580
+5,148500
+8,248900
+4,129000
+5,169000
+7,236000
+5,109500
+6,200500
+5,116000
+5,133000
+5,66500
+8,303477
+4,132250
+9,350000
+5,148000
+5,136500
+5,157000
+7,187500
+6,178000
+4,118500
+5,100000
+9,328900
+5,145000
+5,135500
+8,268000
+6,149500
+5,122900
+6,172500
+6,154500
+5,165000
+5,118858
+6,140000
+4,106500
+5,142953
+9,611657
+5,135000
+4,110000
+5,153000
+7,180000
+7,240000
+5,125500
+5,128000
+8,255000
+7,250000
+5,131000
+6,174000
+5,154300
+5,143500
+5,88000
+5,145000
+6,173733
+4,75000
+2,35311
+4,135000
+7,238000
+6,176500
+6,201000
+5,145900
+6,169990
+6,193000
+6,207500
+5,175000
+8,285000
+7,176000
+8,236500
+7,222000
+8,201000
+5,117500
+9,320000
+7,190000
+7,242000
+4,79900
+7,184900
+7,253000
+7,239799
+7,244400
+6,150900
+7,214000
+4,150000
+5,143000
+6,137500
+5,124900
+5,143000
+8,270000
+7,192500
+6,197500
+5,129000
+5,119900
+5,133900
+5,172000
+6,127500
+6,145000
+6,124000
+5,132000
+7,185000
+7,155000
+5,116500
+6,272000
+6,155000
+9,239000
+7,214900
+6,178900
+5,160000
+5,135000
+3,37900
+6,140000
+4,135000
+7,173000
+6,99500
+7,182000
+7,167500
+7,165000
+4,85500
+7,199900
+4,110000
+5,139000
+7,178400
+8,336000
+7,159895
+8,255900
+5,126000
+5,125000
+6,117000
+9,395192
+6,195000
+7,197000
+8,348000
+8,168000
+6,187000
+6,173900
+10,337500
+4,121600
+5,136500
+6,185000
+3,91000
+7,206000
+3,82000
+5,86000
+8,232000
+5,136905
+7,181000
+5,149900
+6,163500
+4,88000
+7,240000
+5,102000
+5,135000
+5,100000
+6,165000
+5,85000
+6,119200
+8,227000
+7,203000
+8,187500
+7,160000
+7,213490
+4,176000
+7,194000
+5,87000
+7,191000
+8,287000
+5,112500
+5,167500
+8,293077
+5,105000
+6,118000
+5,160000
+7,197000
+8,310000
+7,230000
+5,119750
+4,84000
+9,315500
+8,287000
+4,97000
+4,80000
+5,155000
+6,173000
+6,196000
+7,262280
+8,278000
+3,139600
+9,556581
+5,145000
+5,115000
+4,84900
+7,176485
+7,200141
+6,165000
+5,144500
+8,255000
+6,180000
+7,185850
+7,248000
+9,335000
+6,220000
+8,213500
+3,81000
+5,90000
+6,110500
+5,154000
+7,328000
+6,178000
+6,167900
+6,151400
+5,135000
+5,135000
+6,154000
+5,91500
+6,159500
+7,194000
+7,219500
+5,170000
+5,138800
+6,155900
+5,126000
+6,145000
+5,133000
+7,192000
+6,160000
+6,187500
+6,147000
+4,83500
+8,252000
+7,137500
+8,197000
+3,92900
+7,160000
+6,136500
+5,146000
+5,129000
+6,176432
+6,127000
+8,170000
+4,128000
+7,157000
+2,60000
+5,119500
+5,135000
+6,159500
+5,106000
+8,325000
+7,179900
+7,274725
+6,181000
+8,280000
+6,188000
+7,205000
+5,129900
+5,134500
+5,117000
+8,318000
+8,184100
+5,130000
+5,140000
+5,133700
+6,118400
+7,212900
+4,112000
+5,118000
+7,163900
+4,115000
+7,174000
+7,259000
+7,215000
+5,140000
+4,135000
+5,93500
+6,117500
+8,239500
+6,169000
+6,102000
+6,119000
+5,94000
+6,196000
+5,144000
+5,139000
+5,197500
+8,424870
+5,80000
+4,80000
+5,149000
+6,180000
+7,174500
+7,116900
+7,143000
+6,124000
+5,149900
+6,230000
+6,120500
+7,201800
+5,218000
+5,179900
+7,230000
+8,235128
+6,185000
+6,146000
+6,224000
+5,129000
+4,108959
+5,194000
+7,233170
+8,245350
+6,173000
+6,235000
+10,625000
+6,171000
+6,163000
+7,171900
+5,200500
+6,239000
+8,285000
+5,119500
+6,115000
+5,154900
+5,93000
+7,250000
+8,392500
+10,745000
+5,120000
+5,186700
+5,104900
+3,95000
+8,262000
+7,195000
+7,189000
+4,168000
+8,174000
+5,125000
+6,165000
+6,158000
+6,176000
+7,219210
+7,144000
+7,178000
+4,148000
+4,116050
+7,197900
+5,117000
+7,213000
+5,153500
+7,271900
+4,107000
+6,200000
+5,140000
+8,290000
+6,189000
+8,164000
+4,113000
+4,145000
+5,134500
+5,125000
+6,112000
+8,229456
+4,80500
+6,91500
+5,115000
+5,134000
+6,143000
+5,137900
+7,184000
+6,145000
+6,214000
+5,147000
+9,367294
+5,127000
+5,190000
+5,132500
+4,101800
+5,142000
+5,130000
+5,138887
+7,175500
+7,195000
+6,142500
+8,265900
+7,224900
+7,248328
+7,170000
+10,465000
+8,230000
+6,178000
+7,186500
+6,169900
+6,129500
+5,119000
+7,244000
+7,171750
+5,130000
+7,294000
+7,165400
+6,127500
+8,301500
+5,99900
+7,190000
+6,151000
+6,181000
+5,128900
+4,161500
+6,180500
+6,181000
+7,183900
+7,122000
+9,378500
+8,381000
+5,144000
+7,260000
+6,185750
+5,137000
+6,177000
+5,139000
+5,137000
+6,162000
+6,197900
+8,237000
+4,68400
+7,227000
+7,180000
+5,150500
+6,139000
+6,169000
+6,132500
+6,143000
+5,190000
+8,278000
+8,281000
+5,180500
+5,119500
+5,107500
+7,162900
+5,115000
+5,138500
+5,155000
+6,140000
+10,160000
+5,154000
+7,225000
+6,177500
+8,290000
+7,232000
+7,130000
+9,325000
+7,202500
+5,138000
+5,147000
+6,179200
+7,335000
+7,203000
+8,302000
+9,333168
+4,119000
+6,206900
+8,295493
+7,208900
+8,275000
+4,111000
+6,156500
+3,72500
+7,190000
+4,82500
+8,147000
+4,55000
+3,79000
+5,130500
+6,256000
+7,176500
+8,227000
+5,132500
+4,100000
+5,125500
+5,125000
+6,167900
+5,135000
+4,52500
+7,200000
+5,128500
+4,123000
+6,155000
+8,228500
+6,177000
+7,155835
+4,108500
+7,262500
+8,283463
+7,215000
+8,122000
+5,200000
+6,171000
+6,134900
+8,410000
+7,235000
+7,170000
+5,110000
+5,149900
+6,177500
+9,315000
+5,189000
+7,260000
+4,104900
+6,156932
+7,144152
+7,216000
+7,193000
+5,127000
+6,144000
+8,232000
+4,105000
+6,165500
+7,274300
+10,466500
+7,250000
+8,239000
+6,91000
+5,117000
+6,83000
+5,167500
+3,58500
+6,237500
+7,157000
+5,112000
+6,105000
+4,125500
+7,250000
+6,136000
+9,377500
+6,131000
+7,235000
+5,124000
+5,123000
+6,163000
+7,246578
+8,281213
+5,160000
+5,137500
+5,138000
+6,137450
+6,120000
+6,193000
+7,193879
+8,282922
+3,105000
+8,275000
+5,133000
+5,112000
+4,125500
+7,215000
+7,230000
+6,140000
+4,90000
+8,257000
+6,207000
+7,175900
+4,122500
+8,340000
+5,124000
+6,223000
+6,179900
+6,127500
+6,136500
+6,274970
+5,144000
+6,142000
+7,271000
+5,140000
+5,119000
+6,182900
+5,192140
+6,143750
+4,64500
+6,186500
+5,160000
+6,174000
+4,120500
+8,394617
+6,149700
+7,197000
+6,191000
+6,149300
+10,310000
+6,121000
+7,179600
+6,129000
+5,157900
+8,240000
+4,112000
+5,92000
+5,136000
+8,287090
+5,145000
+5,84500
+7,185000
+6,175000
+6,210000
+7,266500
+5,142125
+5,147500
\ No newline at end of file
diff --git a/examples/datasets/titanic.csv b/examples/datasets/titanic.csv
new file mode 100644
index 0000000..bb66baf
--- /dev/null
+++ b/examples/datasets/titanic.csv
@@ -0,0 +1,889 @@
+0,3,1,2
+1,1,0,0
+1,3,0,2
+1,1,0,2
+0,3,1,2
+0,3,1,1
+0,1,1,2
+0,3,1,2
+1,3,0,2
+1,2,0,0
+1,3,0,2
+1,1,0,2
+0,3,1,2
+0,3,1,2
+0,3,0,2
+1,2,0,2
+0,3,1,1
+1,2,1,2
+0,3,0,2
+1,3,0,0
+0,2,1,2
+1,2,1,2
+1,3,0,1
+1,1,1,2
+0,3,0,2
+1,3,0,2
+0,3,1,0
+0,1,1,2
+1,3,0,1
+0,3,1,2
+0,1,1,0
+1,1,0,0
+1,3,0,1
+0,2,1,2
+0,1,1,0
+0,1,1,2
+1,3,1,0
+0,3,1,2
+0,3,0,2
+1,3,0,0
+0,3,0,2
+0,2,0,2
+0,3,1,0
+1,2,0,0
+1,3,0,1
+0,3,1,2
+0,3,1,1
+1,3,0,1
+0,3,1,0
+0,3,0,2
+0,3,1,2
+0,3,1,2
+1,1,0,0
+1,2,0,2
+0,1,1,0
+1,1,1,2
+1,2,0,2
+0,3,1,0
+1,2,0,2
+0,3,1,2
+0,3,1,0
+0,1,1,2
+0,3,1,2
+0,1,1,0
+1,3,1,0
+1,2,0,2
+0,3,1,2
+1,3,0,2
+0,3,1,2
+0,2,1,2
+0,3,0,2
+0,2,1,2
+0,3,1,0
+1,3,1,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+1,2,1,2
+1,3,0,2
+0,3,1,2
+1,3,1,2
+1,3,0,1
+0,1,1,2
+1,2,0,2
+1,3,0,2
+0,3,1,2
+0,3,1,2
+1,1,0,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+0,1,1,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+0,1,1,0
+1,1,1,0
+1,2,0,2
+0,2,1,2
+0,3,0,2
+0,3,1,2
+0,1,1,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+1,3,0,2
+1,3,1,2
+0,3,1,2
+1,3,0,1
+0,1,1,2
+0,3,0,0
+0,3,1,2
+0,3,0,2
+0,3,0,0
+0,3,1,2
+0,3,1,1
+0,2,1,2
+0,1,1,0
+0,3,0,2
+0,2,1,2
+0,3,1,2
+0,2,1,0
+1,2,0,2
+0,1,1,2
+1,3,1,0
+0,3,1,1
+1,3,1,2
+1,3,0,0
+0,3,1,2
+0,3,1,0
+0,3,1,2
+0,3,0,2
+1,2,0,2
+0,2,1,2
+0,2,1,0
+1,1,0,2
+0,1,1,2
+0,3,1,2
+0,1,1,0
+0,3,0,0
+1,3,0,2
+1,3,0,2
+0,3,1,1
+0,2,1,2
+0,2,1,2
+1,3,1,2
+0,3,0,2
+0,2,1,2
+0,2,1,2
+0,2,1,2
+1,1,0,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+0,1,1,0
+1,3,0,1
+0,3,1,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+1,2,0,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+1,3,1,2
+1,1,0,2
+0,3,0,2
+0,1,1,2
+0,3,1,2
+0,1,1,2
+0,3,1,1
+1,3,0,2
+0,3,1,2
+0,1,1,0
+0,3,1,2
+0,3,1,2
+0,1,0,0
+0,2,1,2
+0,3,1,2
+0,3,0,2
+0,2,1,0
+0,3,1,2
+1,2,1,2
+1,3,0,2
+0,1,1,2
+1,3,0,1
+1,1,1,2
+0,3,1,1
+0,3,1,2
+1,2,0,2
+0,2,1,2
+1,3,0,2
+1,2,1,2
+1,1,0,0
+1,1,0,0
+0,3,1,1
+0,3,1,2
+1,3,0,1
+0,2,0,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+0,3,1,0
+1,3,1,2
+0,3,0,2
+0,3,1,2
+1,3,1,0
+1,3,0,1
+1,1,1,0
+0,3,1,2
+1,2,0,2
+0,3,1,2
+0,2,1,2
+0,3,1,1
+1,1,0,0
+1,3,0,2
+0,2,1,2
+1,1,0,0
+0,2,1,2
+1,3,1,2
+0,2,1,2
+0,3,1,2
+0,3,1,2
+1,1,1,2
+0,3,1,2
+1,2,1,2
+0,3,1,2
+0,2,1,2
+0,3,0,2
+1,1,0,2
+0,3,1,2
+0,2,1,2
+1,3,0,2
+0,2,1,2
+0,3,0,2
+0,2,1,2
+1,2,0,2
+0,2,1,2
+0,2,1,2
+0,3,0,0
+1,3,0,1
+0,2,1,2
+0,3,1,2
+0,3,1,0
+0,1,1,1
+0,3,0,2
+1,2,0,2
+1,1,1,2
+0,2,1,2
+0,3,1,2
+0,3,0,2
+0,1,1,2
+0,3,1,2
+0,3,0,2
+1,3,0,0
+1,1,0,0
+1,1,0,2
+1,1,0,0
+1,2,0,2
+0,3,1,1
+1,3,1,2
+0,1,1,2
+0,1,1,2
+0,3,0,1
+0,2,1,2
+0,3,1,2
+1,3,1,2
+1,1,0,2
+1,1,0,2
+0,1,1,2
+1,3,1,2
+1,2,0,2
+0,1,1,0
+1,3,0,1
+1,1,0,2
+0,3,0,2
+0,2,1,2
+0,3,1,1
+1,3,0,2
+0,3,1,1
+0,3,1,2
+0,3,1,2
+1,3,1,2
+0,1,1,2
+0,3,1,0
+1,3,1,2
+0,3,1,2
+1,2,1,2
+1,3,0,1
+1,1,0,2
+1,1,0,0
+0,2,1,0
+0,3,0,2
+0,3,1,2
+0,1,1,0
+0,3,1,0
+0,1,0,2
+1,1,1,2
+1,1,0,0
+1,3,0,1
+1,3,1,1
+0,3,1,2
+1,2,0,1
+0,3,1,2
+1,1,1,2
+1,1,0,0
+1,1,0,0
+0,2,1,0
+1,1,0,0
+1,1,0,0
+1,1,0,0
+0,2,0,2
+0,3,1,2
+0,2,1,2
+1,3,0,2
+1,2,0,2
+0,2,1,2
+1,1,0,2
+1,1,0,0
+0,3,1,2
+0,3,1,2
+1,2,0,1
+1,2,0,2
+0,3,1,2
+1,1,0,0
+0,3,1,2
+1,2,0,2
+1,3,0,2
+1,1,0,0
+1,3,0,1
+0,1,1,2
+0,1,1,2
+0,3,1,2
+1,1,0,2
+0,3,1,2
+0,1,1,2
+1,1,0,0
+1,3,1,2
+0,1,1,2
+1,2,1,2
+1,1,0,2
+0,2,1,2
+0,2,1,2
+0,2,1,2
+1,2,0,2
+1,2,0,2
+1,3,0,2
+1,3,1,2
+0,3,1,2
+0,3,1,2
+0,1,1,2
+0,3,1,0
+0,3,1,2
+0,3,1,0
+0,3,1,2
+1,1,0,2
+0,2,0,2
+1,3,0,1
+1,3,0,1
+0,3,1,2
+0,2,1,0
+0,3,0,0
+0,3,1,2
+0,3,1,1
+0,3,1,2
+1,1,0,0
+1,3,0,0
+1,3,0,1
+1,1,0,0
+1,1,1,0
+0,3,1,2
+0,3,1,2
+0,1,1,0
+0,3,0,2
+1,1,0,0
+1,3,0,2
+0,1,1,0
+0,3,1,0
+0,3,1,2
+1,1,0,0
+1,3,0,0
+0,3,1,2
+1,1,0,2
+0,3,1,2
+0,2,1,2
+0,3,1,2
+1,2,0,2
+0,3,1,1
+1,2,0,0
+1,1,1,2
+1,3,1,2
+0,3,1,2
+1,1,0,0
+1,3,0,2
+0,3,1,2
+0,3,0,2
+0,2,1,2
+0,2,1,2
+1,2,0,2
+1,3,1,2
+0,3,1,2
+0,3,0,2
+0,3,1,2
+0,3,0,2
+0,2,1,2
+0,3,1,2
+1,2,1,2
+0,3,1,2
+0,3,0,2
+0,3,1,2
+0,3,1,1
+1,1,0,1
+0,2,1,2
+1,3,1,2
+0,3,0,2
+1,2,0,2
+1,2,0,2
+0,2,1,2
+0,3,0,2
+0,3,1,0
+0,3,1,1
+0,3,1,2
+0,3,0,2
+0,3,1,2
+0,3,1,2
+1,2,0,2
+1,2,0,2
+0,3,1,1
+1,3,1,2
+1,1,1,2
+1,3,0,2
+1,2,0,2
+0,3,1,2
+0,1,1,2
+1,1,0,2
+0,3,0,2
+1,2,0,2
+0,1,1,2
+0,2,1,2
+1,2,0,2
+0,3,1,2
+0,3,1,2
+1,2,0,2
+1,3,1,2
+1,1,1,2
+1,2,0,2
+1,1,1,2
+1,3,0,0
+1,1,1,2
+0,2,1,2
+0,3,1,2
+0,1,1,0
+1,1,1,0
+0,3,1,2
+1,3,1,0
+0,1,1,2
+1,1,0,2
+1,2,0,2
+0,3,1,1
+1,1,1,2
+0,3,1,2
+0,1,1,2
+0,2,1,2
+0,3,1,2
+0,3,1,2
+0,2,1,2
+0,1,1,2
+0,3,1,1
+1,3,0,0
+0,3,1,2
+0,3,1,2
+1,2,0,2
+1,2,0,0
+0,3,0,2
+0,1,1,2
+0,2,1,2
+0,3,1,2
+0,3,1,2
+1,3,0,2
+0,3,1,2
+0,2,1,2
+0,3,1,2
+1,3,0,2
+1,1,1,0
+0,3,0,2
+1,1,0,2
+0,1,1,0
+0,3,1,2
+1,3,1,2
+0,3,1,2
+0,3,1,2
+0,1,1,2
+0,1,1,0
+0,3,1,2
+0,3,1,0
+1,1,0,0
+0,3,1,2
+0,1,0,2
+0,3,1,2
+0,3,1,2
+0,3,0,1
+0,3,0,1
+0,3,0,2
+1,1,0,2
+0,1,1,0
+1,2,0,2
+1,1,1,2
+0,3,1,2
+1,3,1,2
+1,3,1,1
+0,3,1,2
+1,1,1,2
+1,1,0,0
+0,3,1,2
+0,1,1,2
+1,2,0,2
+0,3,1,1
+1,2,0,2
+0,3,1,2
+1,1,0,2
+0,3,1,2
+0,3,1,0
+1,1,0,0
+0,3,1,0
+0,3,1,1
+1,2,0,2
+0,1,1,2
+0,3,1,2
+0,2,1,2
+1,2,0,2
+0,3,1,0
+0,3,1,0
+1,3,0,0
+0,3,0,2
+1,2,0,2
+0,1,1,2
+1,1,0,0
+0,3,1,2
+1,1,0,0
+1,1,0,2
+0,3,0,2
+0,3,0,2
+1,2,1,2
+0,1,1,0
+0,1,1,2
+1,2,0,2
+1,2,1,0
+0,3,1,2
+1,2,1,2
+1,1,1,0
+0,2,1,2
+0,3,1,1
+1,3,1,0
+1,3,0,2
+0,1,1,2
+1,1,0,0
+0,1,1,0
+1,1,0,2
+1,3,0,2
+0,3,1,1
+0,3,1,2
+0,2,1,2
+0,3,1,2
+0,3,0,2
+0,3,1,2
+0,3,1,2
+0,3,0,2
+0,3,1,0
+1,3,1,2
+1,2,1,2
+1,1,0,2
+1,1,1,2
+1,3,0,1
+0,3,1,2
+0,3,1,2
+1,2,0,2
+1,1,0,2
+0,3,0,0
+1,3,1,2
+1,2,0,2
+1,1,0,0
+0,2,1,2
+0,1,1,0
+0,3,1,0
+1,1,0,2
+0,2,1,2
+1,1,1,0
+0,3,1,2
+0,3,1,2
+0,3,1,2
+1,1,0,0
+0,3,1,2
+0,3,0,1
+0,2,1,2
+0,3,1,2
+1,2,0,2
+0,3,1,2
+0,3,1,0
+1,1,1,0
+1,2,0,2
+0,3,1,2
+0,1,1,2
+0,3,1,2
+1,1,1,0
+0,3,1,2
+0,3,1,2
+1,1,1,2
+1,2,0,0
+1,1,0,2
+0,3,0,2
+0,3,1,2
+1,3,0,1
+0,3,1,1
+0,3,1,2
+1,2,0,2
+0,3,1,2
+0,3,0,2
+1,2,0,2
+0,2,1,2
+0,3,1,0
+1,1,1,2
+1,3,1,0
+0,3,1,2
+0,3,1,2
+0,1,1,2
+0,2,1,1
+1,1,0,2
+0,3,1,2
+0,3,1,1
+1,1,1,2
+0,3,1,2
+1,1,1,0
+0,1,1,2
+0,3,0,2
+1,2,0,2
+0,3,1,2
+0,2,1,2
+0,3,0,2
+0,3,1,2
+0,3,1,2
+1,1,0,0
+0,3,0,2
+1,3,1,2
+1,3,0,0
+1,1,1,0
+0,3,1,2
+1,1,1,0
+0,3,1,2
+1,3,0,2
+0,3,1,2
+1,2,0,2
+0,3,1,2
+1,3,0,1
+0,3,0,1
+0,2,1,2
+0,3,1,2
+0,3,0,1
+0,2,1,2
+0,1,1,0
+1,1,1,2
+0,3,1,0
+0,1,1,2
+0,3,1,2
+1,3,1,2
+0,2,1,2
+0,2,1,2
+0,3,1,2
+0,3,1,2
+1,1,0,2
+1,2,0,2
+0,1,1,2
+0,2,1,2
+1,2,1,2
+0,2,1,2
+0,3,1,2
+0,3,1,2
+1,3,0,2
+0,3,0,2
+1,1,1,0
+0,3,0,1
+1,1,1,0
+0,3,1,2
+0,3,1,2
+0,2,1,2
+0,2,1,0
+0,3,1,2
+0,3,1,2
+0,3,1,2
+1,1,0,2
+1,1,1,2
+1,3,0,0
+1,3,1,2
+0,3,1,0
+0,1,1,2
+0,2,1,2
+0,3,1,2
+1,3,0,1
+0,1,1,0
+0,3,1,2
+1,1,0,0
+1,1,1,2
+0,3,0,0
+0,3,1,1
+0,3,1,2
+0,2,1,2
+1,2,0,2
+1,1,1,2
+1,1,0,2
+1,3,1,0
+1,1,0,0
+0,1,1,2
+1,1,1,2
+0,3,1,2
+0,2,1,2
+0,3,1,2
+1,1,0,0
+1,2,0,2
+0,3,1,1
+0,3,1,2
+1,2,0,2
+0,3,1,2
+0,2,1,2
+0,2,1,2
+1,1,1,2
+0,3,1,2
+1,2,0,2
+1,3,0,1
+0,2,1,2
+0,3,0,2
+1,1,0,2
+0,3,1,0
+0,2,1,2
+0,2,1,2
+0,2,1,2
+0,3,1,2
+0,3,0,2
+1,1,1,0
+0,3,1,2
+0,3,1,2
+1,1,1,2
+0,1,1,2
+1,1,0,0
+0,3,1,2
+1,3,1,2
+0,1,1,2
+0,3,1,2
+1,2,0,2
+0,1,1,2
+0,3,1,1
+1,2,0,2
+1,3,1,2
+0,3,1,2
+0,3,1,2
+1,2,0,2
+1,2,1,2
+0,3,1,2
+0,2,1,2
+0,3,1,2
+1,1,0,2
+0,3,1,2
+0,3,1,2
+1,3,1,0
+1,1,0,2
+0,3,1,2
+1,1,0,2
+0,1,1,0
+0,3,0,1
+0,3,1,1
+0,3,1,2
+0,3,1,2
+0,3,1,2
+0,2,0,2
+0,3,1,0
+1,2,0,2
+0,3,1,2
+0,3,1,1
+1,3,0,2
+0,3,1,1
+1,1,0,2
+1,3,0,0
+1,1,0,2
+0,1,1,2
+0,3,1,2
+0,3,1,2
+0,3,1,2
+1,3,0,2
+0,3,1,1
+1,3,1,2
+0,1,1,0
+0,3,1,1
+0,2,1,2
+0,3,0,2
+0,1,1,0
+0,3,1,2
+0,2,1,2
+1,1,0,2
+1,3,0,2
+0,3,1,0
+0,3,0,2
+0,2,1,2
+1,2,0,2
+1,1,1,2
+1,3,1,0
+1,3,1,2
+0,3,1,2
+0,1,1,2
+0,3,0,2
+0,2,1,2
+1,1,0,2
+0,3,1,2
+0,3,1,2
+0,2,1,2
+0,3,0,2
+0,3,1,2
+0,1,1,2
+0,3,0,2
+0,2,1,0
+0,3,1,2
+0,3,1,2
+1,1,0,2
+1,3,1,2
+0,1,1,2
+1,3,0,2
+0,3,1,2
+0,3,1,1
+0,3,1,2
+1,2,1,0
+1,3,1,1
+1,3,0,0
+1,2,1,2
+0,3,1,0
+0,3,1,2
+0,3,1,2
+1,1,0,0
+0,3,1,2
+0,3,1,2
+1,3,1,2
+1,1,1,0
+0,3,1,2
+0,2,1,2
+1,1,0,0
+0,3,1,0
+0,3,1,2
+0,3,1,2
+0,3,1,2
+0,3,1,0
+0,2,1,2
+1,1,0,0
+0,3,1,2
+0,3,1,2
+0,3,0,0
+1,1,0,2
+0,2,0,2
+1,3,0,2
+1,1,0,2
+1,1,1,2
+1,3,0,0
+0,3,1,0
+0,3,1,2
+0,2,1,2
+1,1,0,2
+0,3,0,2
+0,2,1,2
+1,2,0,2
+1,2,0,0
+0,1,1,2
+0,3,1,2
+1,3,1,2
+0,3,1,2
+1,1,0,2
+0,1,1,2
+0,3,1,2
+1,2,0,0
+1,3,0,0
+0,3,1,2
+0,3,1,2
+0,3,1,2
+1,1,0,0
+1,2,0,2
+0,3,1,2
+0,3,0,2
+0,2,1,2
+0,3,1,2
+0,3,0,1
+0,2,1,2
+1,1,0,2
+0,3,0,2
+1,1,1,0
+0,3,1,1
\ No newline at end of file
diff --git a/examples/trees/cart.go b/examples/trees/cart.go
new file mode 100644
index 0000000..931b7e5
--- /dev/null
+++ b/examples/trees/cart.go
@@ -0,0 +1,61 @@
+// Example of how to use CART trees for both Classification and Regression
+
+package main
+
+import (
+	"fmt"
+
+	"github.com/sjwhitworth/golearn/base"
+)
+
+func main() {
+
+	// Load Titanic Data For classification
+	classificationData, err := base.ParseCSVToInstances("../datasets/titanic.csv", false)
+	if err != nil {
+		panic(err)
+	}
+	trainData, testData := base.InstancesTrainTestSplit(classificationData, 0.5)
+
+	// Create New Classification Tree
+	// Hyperparameters - loss function, max Depth (-1 will split until pure), list of unique labels
+	decTree = NewDecisionTreeClassifier("entropy", -1, []int64{0, 1})
+
+	// Train Tree
+	decTree.Fit(trainData)
+	// Print out tree for visualization - shows splits and feature and predictions
+	fmt.Println(decTree.String())
+
+	// Access Predictions
+	classificationPreds := decTree.Predict(testData)
+
+	fmt.Println("Titanic Predictions")
+	fmt.Println(classificationPreds)
+
+	// Evaluate Accuracy on Test Data
+	fmt.Println(decTree.Evaluate(testData))
+
+	// Load House Price Data For Regression
+
+	regressionData, err := base.ParseCSVToInstances("../datasets/boston_house_prices.csv", false)
+	if err != nil {
+		panic(err)
+	}
+	trainRegData, testRegData := base.InstancesTrainTestSplit(regressionData, 0.5)
+
+	// Hyperparameters - Loss function, max Depth (-1 will split until pure)
+	regTree := NewDecisionTreeRegressor("mse", -1)
+
+	// Train Tree
+	regTree.Fit(trainRegData)
+
+	// Print out tree for visualization
+	fmt.Println(regTree.String())
+
+	// Access Predictions
+	regressionPreds := regTree.Predict(testRegData)
+
+	fmt.Println("Boston House Price Predictions")
+	fmt.Println(regressionPreds)
+
+}
diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 29646fb..39b7165 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -432,7 +432,7 @@ func cpredictFromNode(tree CNode, test [][]float64) []int64 {
 	return preds
 }
 
-// Given Test data and label, return the accuracy of the classifier. Data has to be in float slice format before feeding.
+// Given Test data and label, return the accuracy of the classifier.
 func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) float64 {
 	rootNode := *tree.RootNode
 	xTest := classifierConvertInstancesToProblemVec(test)

From c0c3b2e1bf54a57272bf86ddaa5df36bc087b05a Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 25 Jul 2020 13:22:15 +0530
Subject: [PATCH 07/24] Fixing Sorting

---
 trees/cart_classifier.go | 24 +-----------------------
 trees/cart_regressor.go  | 24 +-----------------------
 trees/sorter.go          | 24 ++++++++++++++++++++++++
 3 files changed, 26 insertions(+), 46 deletions(-)
 create mode 100644 trees/sorter.go

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 39b7165..7f7575d 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -157,31 +157,9 @@ func cvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	return true
 }
 
-// Helper struct for re-rdering data
-type cSlice struct {
-	sort.Float64Slice
-	Idx []int
-}
-
-// Helper function for re-ordering data
-func (s cSlice) cSwap(i, j int) {
-	s.Float64Slice.Swap(i, j)
-	s.Idx[i], s.Idx[j] = s.Idx[j], s.Idx[i]
-}
-
-// Final Helper Function for re-ordering data
-func cNewSlice(n []float64) *cSlice {
-	s := &cSlice{Float64Slice: sort.Float64Slice(n), Idx: make([]int, len(n))}
-
-	for i := range s.Idx {
-		s.Idx[i] = i
-	}
-	return s
-}
-
 // Reorder the data by feature being considered. Optimizes code by reducing the number of times we have to loop over data for splitting
 func creOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64, []int64) {
-	s := cNewSlice(featureVal)
+	s := NewSlice(featureVal)
 	sort.Sort(s)
 
 	indexes := s.Idx
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index 48e61d0..6841a71 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -145,31 +145,9 @@ func rvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	return true
 }
 
-// Helper struct for re-rdering data
-type rSlice struct {
-	sort.Float64Slice
-	Idx []int
-}
-
-// Helper function for re-ordering data
-func (s rSlice) rSwap(i, j int) {
-	s.Float64Slice.Swap(i, j)
-	s.Idx[i], s.Idx[j] = s.Idx[j], s.Idx[i]
-}
-
-// Final Helper Function for re-ordering data
-func rNewSlice(n []float64) *rSlice {
-	s := &rSlice{Float64Slice: sort.Float64Slice(n), Idx: make([]int, len(n))}
-
-	for i := range s.Idx {
-		s.Idx[i] = i
-	}
-	return s
-}
-
 // Re order data based on a feature for optimizing code
 func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
-	s := rNewSlice(featureVal)
+	s := NewSlice(featureVal)
 	sort.Sort(s)
 
 	indexes := s.Idx
diff --git a/trees/sorter.go b/trees/sorter.go
new file mode 100644
index 0000000..157b25d
--- /dev/null
+++ b/trees/sorter.go
@@ -0,0 +1,24 @@
+package trees
+
+import (
+	"sort"
+)
+
+type Slice struct {
+	sort.Float64Slice
+	Idx []int
+}
+
+func (s Slice) Swap(i, j int) {
+	s.Float64Slice.Swap(i, j)
+	s.Idx[i], s.Idx[j] = s.Idx[j], s.Idx[i]
+}
+
+func NewSlice(n []float64) *Slice {
+	s := &Slice{Float64Slice: sort.Float64Slice(n), Idx: make([]int, len(n))}
+
+	for i := range s.Idx {
+		s.Idx[i] = i
+	}
+	return s
+}

From abed408f9bcd41fc9757fbbed1b783f572a11845 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sun, 26 Jul 2020 11:21:20 +0530
Subject: [PATCH 08/24] Updating Dataset + Naming

---
 examples/datasets/titanic.csv | 1732 ++++++++++++++++-----------------
 trees/cart_classifier.go      |   76 +-
 trees/cart_regressor.go       |   90 +-
 3 files changed, 949 insertions(+), 949 deletions(-)

diff --git a/examples/datasets/titanic.csv b/examples/datasets/titanic.csv
index bb66baf..fe6999f 100644
--- a/examples/datasets/titanic.csv
+++ b/examples/datasets/titanic.csv
@@ -1,889 +1,889 @@
-0,3,1,2
+3,1,2,0
+1,0,0,1
+3,0,2,1
+1,0,2,1
+3,1,2,0
+3,1,1,0
+1,1,2,0
+3,1,2,0
+3,0,2,1
+2,0,0,1
+3,0,2,1
+1,0,2,1
+3,1,2,0
+3,1,2,0
+3,0,2,0
+2,0,2,1
+3,1,1,0
+2,1,2,1
+3,0,2,0
+3,0,0,1
+2,1,2,0
+2,1,2,1
+3,0,1,1
+1,1,2,1
+3,0,2,0
+3,0,2,1
+3,1,0,0
+1,1,2,0
+3,0,1,1
+3,1,2,0
 1,1,0,0
-1,3,0,2
-1,1,0,2
-0,3,1,2
-0,3,1,1
-0,1,1,2
-0,3,1,2
-1,3,0,2
-1,2,0,0
-1,3,0,2
-1,1,0,2
-0,3,1,2
-0,3,1,2
-0,3,0,2
-1,2,0,2
-0,3,1,1
-1,2,1,2
-0,3,0,2
-1,3,0,0
-0,2,1,2
-1,2,1,2
-1,3,0,1
-1,1,1,2
-0,3,0,2
-1,3,0,2
-0,3,1,0
-0,1,1,2
-1,3,0,1
-0,3,1,2
-0,1,1,0
+1,0,0,1
+3,0,1,1
+2,1,2,0
 1,1,0,0
-1,3,0,1
-0,2,1,2
-0,1,1,0
-0,1,1,2
-1,3,1,0
-0,3,1,2
-0,3,0,2
-1,3,0,0
-0,3,0,2
-0,2,0,2
-0,3,1,0
-1,2,0,0
-1,3,0,1
-0,3,1,2
-0,3,1,1
-1,3,0,1
-0,3,1,0
-0,3,0,2
-0,3,1,2
-0,3,1,2
+1,1,2,0
+3,1,0,1
+3,1,2,0
+3,0,2,0
+3,0,0,1
+3,0,2,0
+2,0,2,0
+3,1,0,0
+2,0,0,1
+3,0,1,1
+3,1,2,0
+3,1,1,0
+3,0,1,1
+3,1,0,0
+3,0,2,0
+3,1,2,0
+3,1,2,0
+1,0,0,1
+2,0,2,1
 1,1,0,0
-1,2,0,2
-0,1,1,0
-1,1,1,2
-1,2,0,2
-0,3,1,0
-1,2,0,2
-0,3,1,2
-0,3,1,0
-0,1,1,2
-0,3,1,2
-0,1,1,0
-1,3,1,0
-1,2,0,2
-0,3,1,2
-1,3,0,2
-0,3,1,2
-0,2,1,2
-0,3,0,2
-0,2,1,2
-0,3,1,0
-1,3,1,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-1,2,1,2
-1,3,0,2
-0,3,1,2
-1,3,1,2
-1,3,0,1
-0,1,1,2
-1,2,0,2
-1,3,0,2
-0,3,1,2
-0,3,1,2
-1,1,0,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-0,1,1,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-0,1,1,0
-1,1,1,0
-1,2,0,2
-0,2,1,2
-0,3,0,2
-0,3,1,2
-0,1,1,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-1,3,0,2
-1,3,1,2
-0,3,1,2
-1,3,0,1
-0,1,1,2
-0,3,0,0
-0,3,1,2
-0,3,0,2
-0,3,0,0
-0,3,1,2
-0,3,1,1
-0,2,1,2
-0,1,1,0
-0,3,0,2
-0,2,1,2
-0,3,1,2
-0,2,1,0
-1,2,0,2
-0,1,1,2
-1,3,1,0
-0,3,1,1
-1,3,1,2
-1,3,0,0
-0,3,1,2
-0,3,1,0
-0,3,1,2
-0,3,0,2
-1,2,0,2
-0,2,1,2
-0,2,1,0
-1,1,0,2
-0,1,1,2
-0,3,1,2
-0,1,1,0
-0,3,0,0
-1,3,0,2
-1,3,0,2
-0,3,1,1
-0,2,1,2
-0,2,1,2
-1,3,1,2
-0,3,0,2
-0,2,1,2
-0,2,1,2
-0,2,1,2
-1,1,0,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-0,1,1,0
-1,3,0,1
-0,3,1,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-1,2,0,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-1,3,1,2
-1,1,0,2
-0,3,0,2
-0,1,1,2
-0,3,1,2
-0,1,1,2
-0,3,1,1
-1,3,0,2
-0,3,1,2
-0,1,1,0
-0,3,1,2
-0,3,1,2
-0,1,0,0
-0,2,1,2
-0,3,1,2
-0,3,0,2
-0,2,1,0
-0,3,1,2
-1,2,1,2
-1,3,0,2
-0,1,1,2
-1,3,0,1
-1,1,1,2
-0,3,1,1
-0,3,1,2
-1,2,0,2
-0,2,1,2
-1,3,0,2
-1,2,1,2
+1,1,2,1
+2,0,2,1
+3,1,0,0
+2,0,2,1
+3,1,2,0
+3,1,0,0
+1,1,2,0
+3,1,2,0
 1,1,0,0
+3,1,0,1
+2,0,2,1
+3,1,2,0
+3,0,2,1
+3,1,2,0
+2,1,2,0
+3,0,2,0
+2,1,2,0
+3,1,0,0
+3,1,2,1
+3,1,2,0
+3,1,2,0
+3,1,2,0
+2,1,2,1
+3,0,2,1
+3,1,2,0
+3,1,2,1
+3,0,1,1
+1,1,2,0
+2,0,2,1
+3,0,2,1
+3,1,2,0
+3,1,2,0
+1,0,2,1
+3,1,2,0
+3,1,2,0
+3,1,2,0
+1,1,2,0
+3,1,2,0
+3,1,2,0
+3,1,2,0
 1,1,0,0
-0,3,1,1
-0,3,1,2
-1,3,0,1
-0,2,0,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-0,3,1,0
-1,3,1,2
-0,3,0,2
-0,3,1,2
-1,3,1,0
-1,3,0,1
-1,1,1,0
-0,3,1,2
-1,2,0,2
-0,3,1,2
-0,2,1,2
-0,3,1,1
-1,1,0,0
-1,3,0,2
-0,2,1,2
-1,1,0,0
-0,2,1,2
-1,3,1,2
-0,2,1,2
-0,3,1,2
-0,3,1,2
-1,1,1,2
-0,3,1,2
-1,2,1,2
-0,3,1,2
-0,2,1,2
-0,3,0,2
-1,1,0,2
-0,3,1,2
-0,2,1,2
-1,3,0,2
-0,2,1,2
-0,3,0,2
-0,2,1,2
-1,2,0,2
-0,2,1,2
-0,2,1,2
-0,3,0,0
-1,3,0,1
-0,2,1,2
-0,3,1,2
-0,3,1,0
-0,1,1,1
-0,3,0,2
-1,2,0,2
-1,1,1,2
-0,2,1,2
-0,3,1,2
-0,3,0,2
-0,1,1,2
-0,3,1,2
-0,3,0,2
-1,3,0,0
-1,1,0,0
-1,1,0,2
-1,1,0,0
-1,2,0,2
-0,3,1,1
-1,3,1,2
-0,1,1,2
-0,1,1,2
-0,3,0,1
-0,2,1,2
-0,3,1,2
-1,3,1,2
-1,1,0,2
-1,1,0,2
-0,1,1,2
-1,3,1,2
-1,2,0,2
-0,1,1,0
-1,3,0,1
-1,1,0,2
-0,3,0,2
-0,2,1,2
-0,3,1,1
-1,3,0,2
-0,3,1,1
-0,3,1,2
-0,3,1,2
-1,3,1,2
-0,1,1,2
-0,3,1,0
-1,3,1,2
-0,3,1,2
-1,2,1,2
-1,3,0,1
-1,1,0,2
-1,1,0,0
-0,2,1,0
-0,3,0,2
-0,3,1,2
-0,1,1,0
-0,3,1,0
-0,1,0,2
-1,1,1,2
-1,1,0,0
-1,3,0,1
-1,3,1,1
-0,3,1,2
-1,2,0,1
-0,3,1,2
-1,1,1,2
-1,1,0,0
-1,1,0,0
-0,2,1,0
-1,1,0,0
-1,1,0,0
-1,1,0,0
-0,2,0,2
-0,3,1,2
-0,2,1,2
-1,3,0,2
-1,2,0,2
-0,2,1,2
-1,1,0,2
-1,1,0,0
-0,3,1,2
-0,3,1,2
-1,2,0,1
-1,2,0,2
-0,3,1,2
-1,1,0,0
-0,3,1,2
-1,2,0,2
-1,3,0,2
-1,1,0,0
-1,3,0,1
-0,1,1,2
-0,1,1,2
-0,3,1,2
-1,1,0,2
-0,3,1,2
-0,1,1,2
-1,1,0,0
-1,3,1,2
-0,1,1,2
-1,2,1,2
-1,1,0,2
-0,2,1,2
-0,2,1,2
-0,2,1,2
-1,2,0,2
-1,2,0,2
-1,3,0,2
-1,3,1,2
-0,3,1,2
-0,3,1,2
-0,1,1,2
-0,3,1,0
-0,3,1,2
-0,3,1,0
-0,3,1,2
-1,1,0,2
-0,2,0,2
-1,3,0,1
-1,3,0,1
-0,3,1,2
-0,2,1,0
-0,3,0,0
-0,3,1,2
-0,3,1,1
-0,3,1,2
-1,1,0,0
-1,3,0,0
-1,3,0,1
-1,1,0,0
-1,1,1,0
-0,3,1,2
-0,3,1,2
-0,1,1,0
-0,3,0,2
-1,1,0,0
-1,3,0,2
-0,1,1,0
-0,3,1,0
-0,3,1,2
-1,1,0,0
-1,3,0,0
-0,3,1,2
-1,1,0,2
-0,3,1,2
-0,2,1,2
-0,3,1,2
-1,2,0,2
-0,3,1,1
-1,2,0,0
-1,1,1,2
-1,3,1,2
-0,3,1,2
-1,1,0,0
-1,3,0,2
-0,3,1,2
-0,3,0,2
-0,2,1,2
-0,2,1,2
-1,2,0,2
-1,3,1,2
-0,3,1,2
-0,3,0,2
-0,3,1,2
-0,3,0,2
-0,2,1,2
-0,3,1,2
-1,2,1,2
-0,3,1,2
-0,3,0,2
-0,3,1,2
-0,3,1,1
 1,1,0,1
-0,2,1,2
-1,3,1,2
-0,3,0,2
-1,2,0,2
-1,2,0,2
-0,2,1,2
-0,3,0,2
-0,3,1,0
-0,3,1,1
-0,3,1,2
-0,3,0,2
-0,3,1,2
-0,3,1,2
-1,2,0,2
-1,2,0,2
-0,3,1,1
-1,3,1,2
-1,1,1,2
-1,3,0,2
-1,2,0,2
-0,3,1,2
-0,1,1,2
-1,1,0,2
-0,3,0,2
-1,2,0,2
-0,1,1,2
-0,2,1,2
-1,2,0,2
-0,3,1,2
-0,3,1,2
-1,2,0,2
-1,3,1,2
-1,1,1,2
-1,2,0,2
-1,1,1,2
-1,3,0,0
-1,1,1,2
-0,2,1,2
-0,3,1,2
-0,1,1,0
+2,0,2,1
+2,1,2,0
+3,0,2,0
+3,1,2,0
+1,1,2,0
+3,1,2,0
+3,1,2,0
+3,1,2,0
+3,0,2,1
+3,1,2,1
+3,1,2,0
+3,0,1,1
+1,1,2,0
+3,0,0,0
+3,1,2,0
+3,0,2,0
+3,0,0,0
+3,1,2,0
+3,1,1,0
+2,1,2,0
+1,1,0,0
+3,0,2,0
+2,1,2,0
+3,1,2,0
+2,1,0,0
+2,0,2,1
+1,1,2,0
+3,1,0,1
+3,1,1,0
+3,1,2,1
+3,0,0,1
+3,1,2,0
+3,1,0,0
+3,1,2,0
+3,0,2,0
+2,0,2,1
+2,1,2,0
+2,1,0,0
+1,0,2,1
+1,1,2,0
+3,1,2,0
+1,1,0,0
+3,0,0,0
+3,0,2,1
+3,0,2,1
+3,1,1,0
+2,1,2,0
+2,1,2,0
+3,1,2,1
+3,0,2,0
+2,1,2,0
+2,1,2,0
+2,1,2,0
+1,0,2,1
+3,1,2,0
+3,1,2,0
+3,1,2,0
+1,1,0,0
+3,0,1,1
+3,1,2,0
+3,1,2,0
+3,1,2,0
+3,1,2,0
+2,0,2,1
+3,1,2,0
+3,1,2,0
+3,1,2,0
+3,1,2,1
+1,0,2,1
+3,0,2,0
+1,1,2,0
+3,1,2,0
+1,1,2,0
+3,1,1,0
+3,0,2,1
+3,1,2,0
+1,1,0,0
+3,1,2,0
+3,1,2,0
+1,0,0,0
+2,1,2,0
+3,1,2,0
+3,0,2,0
+2,1,0,0
+3,1,2,0
+2,1,2,1
+3,0,2,1
+1,1,2,0
+3,0,1,1
+1,1,2,1
+3,1,1,0
+3,1,2,0
+2,0,2,1
+2,1,2,0
+3,0,2,1
+2,1,2,1
+1,0,0,1
+1,0,0,1
+3,1,1,0
+3,1,2,0
+3,0,1,1
+2,0,2,0
+3,1,2,0
+3,1,2,0
+3,1,2,0
+3,1,0,0
+3,1,2,1
+3,0,2,0
+3,1,2,0
+3,1,0,1
+3,0,1,1
+1,1,0,1
+3,1,2,0
+2,0,2,1
+3,1,2,0
+2,1,2,0
+3,1,1,0
+1,0,0,1
+3,0,2,1
+2,1,2,0
+1,0,0,1
+2,1,2,0
+3,1,2,1
+2,1,2,0
+3,1,2,0
+3,1,2,0
+1,1,2,1
+3,1,2,0
+2,1,2,1
+3,1,2,0
+2,1,2,0
+3,0,2,0
+1,0,2,1
+3,1,2,0
+2,1,2,0
+3,0,2,1
+2,1,2,0
+3,0,2,0
+2,1,2,0
+2,0,2,1
+2,1,2,0
+2,1,2,0
+3,0,0,0
+3,0,1,1
+2,1,2,0
+3,1,2,0
+3,1,0,0
 1,1,1,0
-0,3,1,2
-1,3,1,0
-0,1,1,2
-1,1,0,2
-1,2,0,2
-0,3,1,1
-1,1,1,2
-0,3,1,2
-0,1,1,2
-0,2,1,2
-0,3,1,2
-0,3,1,2
-0,2,1,2
-0,1,1,2
-0,3,1,1
-1,3,0,0
-0,3,1,2
-0,3,1,2
-1,2,0,2
-1,2,0,0
-0,3,0,2
-0,1,1,2
-0,2,1,2
-0,3,1,2
-0,3,1,2
-1,3,0,2
-0,3,1,2
-0,2,1,2
-0,3,1,2
-1,3,0,2
-1,1,1,0
-0,3,0,2
-1,1,0,2
-0,1,1,0
-0,3,1,2
-1,3,1,2
-0,3,1,2
-0,3,1,2
-0,1,1,2
-0,1,1,0
-0,3,1,2
-0,3,1,0
+3,0,2,0
+2,0,2,1
+1,1,2,1
+2,1,2,0
+3,1,2,0
+3,0,2,0
+1,1,2,0
+3,1,2,0
+3,0,2,0
+3,0,0,1
+1,0,0,1
+1,0,2,1
+1,0,0,1
+2,0,2,1
+3,1,1,0
+3,1,2,1
+1,1,2,0
+1,1,2,0
+3,0,1,0
+2,1,2,0
+3,1,2,0
+3,1,2,1
+1,0,2,1
+1,0,2,1
+1,1,2,0
+3,1,2,1
+2,0,2,1
 1,1,0,0
-0,3,1,2
-0,1,0,2
-0,3,1,2
-0,3,1,2
-0,3,0,1
-0,3,0,1
-0,3,0,2
-1,1,0,2
-0,1,1,0
-1,2,0,2
-1,1,1,2
-0,3,1,2
-1,3,1,2
-1,3,1,1
-0,3,1,2
-1,1,1,2
+3,0,1,1
+1,0,2,1
+3,0,2,0
+2,1,2,0
+3,1,1,0
+3,0,2,1
+3,1,1,0
+3,1,2,0
+3,1,2,0
+3,1,2,1
+1,1,2,0
+3,1,0,0
+3,1,2,1
+3,1,2,0
+2,1,2,1
+3,0,1,1
+1,0,2,1
+1,0,0,1
+2,1,0,0
+3,0,2,0
+3,1,2,0
 1,1,0,0
-0,3,1,2
-0,1,1,2
-1,2,0,2
-0,3,1,1
-1,2,0,2
-0,3,1,2
-1,1,0,2
-0,3,1,2
-0,3,1,0
+3,1,0,0
+1,0,2,0
+1,1,2,1
+1,0,0,1
+3,0,1,1
+3,1,1,1
+3,1,2,0
+2,0,1,1
+3,1,2,0
+1,1,2,1
+1,0,0,1
+1,0,0,1
+2,1,0,0
+1,0,0,1
+1,0,0,1
+1,0,0,1
+2,0,2,0
+3,1,2,0
+2,1,2,0
+3,0,2,1
+2,0,2,1
+2,1,2,0
+1,0,2,1
+1,0,0,1
+3,1,2,0
+3,1,2,0
+2,0,1,1
+2,0,2,1
+3,1,2,0
+1,0,0,1
+3,1,2,0
+2,0,2,1
+3,0,2,1
+1,0,0,1
+3,0,1,1
+1,1,2,0
+1,1,2,0
+3,1,2,0
+1,0,2,1
+3,1,2,0
+1,1,2,0
+1,0,0,1
+3,1,2,1
+1,1,2,0
+2,1,2,1
+1,0,2,1
+2,1,2,0
+2,1,2,0
+2,1,2,0
+2,0,2,1
+2,0,2,1
+3,0,2,1
+3,1,2,1
+3,1,2,0
+3,1,2,0
+1,1,2,0
+3,1,0,0
+3,1,2,0
+3,1,0,0
+3,1,2,0
+1,0,2,1
+2,0,2,0
+3,0,1,1
+3,0,1,1
+3,1,2,0
+2,1,0,0
+3,0,0,0
+3,1,2,0
+3,1,1,0
+3,1,2,0
+1,0,0,1
+3,0,0,1
+3,0,1,1
+1,0,0,1
+1,1,0,1
+3,1,2,0
+3,1,2,0
 1,1,0,0
-0,3,1,0
-0,3,1,1
-1,2,0,2
-0,1,1,2
-0,3,1,2
-0,2,1,2
-1,2,0,2
-0,3,1,0
-0,3,1,0
-1,3,0,0
-0,3,0,2
-1,2,0,2
-0,1,1,2
+3,0,2,0
+1,0,0,1
+3,0,2,1
 1,1,0,0
-0,3,1,2
+3,1,0,0
+3,1,2,0
+1,0,0,1
+3,0,0,1
+3,1,2,0
+1,0,2,1
+3,1,2,0
+2,1,2,0
+3,1,2,0
+2,0,2,1
+3,1,1,0
+2,0,0,1
+1,1,2,1
+3,1,2,1
+3,1,2,0
+1,0,0,1
+3,0,2,1
+3,1,2,0
+3,0,2,0
+2,1,2,0
+2,1,2,0
+2,0,2,1
+3,1,2,1
+3,1,2,0
+3,0,2,0
+3,1,2,0
+3,0,2,0
+2,1,2,0
+3,1,2,0
+2,1,2,1
+3,1,2,0
+3,0,2,0
+3,1,2,0
+3,1,1,0
+1,0,1,1
+2,1,2,0
+3,1,2,1
+3,0,2,0
+2,0,2,1
+2,0,2,1
+2,1,2,0
+3,0,2,0
+3,1,0,0
+3,1,1,0
+3,1,2,0
+3,0,2,0
+3,1,2,0
+3,1,2,0
+2,0,2,1
+2,0,2,1
+3,1,1,0
+3,1,2,1
+1,1,2,1
+3,0,2,1
+2,0,2,1
+3,1,2,0
+1,1,2,0
+1,0,2,1
+3,0,2,0
+2,0,2,1
+1,1,2,0
+2,1,2,0
+2,0,2,1
+3,1,2,0
+3,1,2,0
+2,0,2,1
+3,1,2,1
+1,1,2,1
+2,0,2,1
+1,1,2,1
+3,0,0,1
+1,1,2,1
+2,1,2,0
+3,1,2,0
 1,1,0,0
-1,1,0,2
-0,3,0,2
-0,3,0,2
-1,2,1,2
-0,1,1,0
-0,1,1,2
-1,2,0,2
-1,2,1,0
-0,3,1,2
-1,2,1,2
-1,1,1,0
-0,2,1,2
-0,3,1,1
-1,3,1,0
-1,3,0,2
-0,1,1,2
+1,1,0,1
+3,1,2,0
+3,1,0,1
+1,1,2,0
+1,0,2,1
+2,0,2,1
+3,1,1,0
+1,1,2,1
+3,1,2,0
+1,1,2,0
+2,1,2,0
+3,1,2,0
+3,1,2,0
+2,1,2,0
+1,1,2,0
+3,1,1,0
+3,0,0,1
+3,1,2,0
+3,1,2,0
+2,0,2,1
+2,0,0,1
+3,0,2,0
+1,1,2,0
+2,1,2,0
+3,1,2,0
+3,1,2,0
+3,0,2,1
+3,1,2,0
+2,1,2,0
+3,1,2,0
+3,0,2,1
+1,1,0,1
+3,0,2,0
+1,0,2,1
 1,1,0,0
-0,1,1,0
-1,1,0,2
-1,3,0,2
-0,3,1,1
-0,3,1,2
-0,2,1,2
-0,3,1,2
-0,3,0,2
-0,3,1,2
-0,3,1,2
-0,3,0,2
-0,3,1,0
-1,3,1,2
-1,2,1,2
-1,1,0,2
-1,1,1,2
-1,3,0,1
-0,3,1,2
-0,3,1,2
-1,2,0,2
-1,1,0,2
-0,3,0,0
-1,3,1,2
-1,2,0,2
+3,1,2,0
+3,1,2,1
+3,1,2,0
+3,1,2,0
+1,1,2,0
 1,1,0,0
-0,2,1,2
-0,1,1,0
-0,3,1,0
-1,1,0,2
-0,2,1,2
-1,1,1,0
-0,3,1,2
-0,3,1,2
-0,3,1,2
+3,1,2,0
+3,1,0,0
+1,0,0,1
+3,1,2,0
+1,0,2,0
+3,1,2,0
+3,1,2,0
+3,0,1,0
+3,0,1,0
+3,0,2,0
+1,0,2,1
 1,1,0,0
-0,3,1,2
-0,3,0,1
-0,2,1,2
-0,3,1,2
-1,2,0,2
-0,3,1,2
-0,3,1,0
-1,1,1,0
-1,2,0,2
-0,3,1,2
-0,1,1,2
-0,3,1,2
-1,1,1,0
-0,3,1,2
-0,3,1,2
-1,1,1,2
-1,2,0,0
-1,1,0,2
-0,3,0,2
-0,3,1,2
-1,3,0,1
-0,3,1,1
-0,3,1,2
-1,2,0,2
-0,3,1,2
-0,3,0,2
-1,2,0,2
-0,2,1,2
-0,3,1,0
-1,1,1,2
-1,3,1,0
-0,3,1,2
-0,3,1,2
-0,1,1,2
-0,2,1,1
-1,1,0,2
-0,3,1,2
-0,3,1,1
-1,1,1,2
-0,3,1,2
-1,1,1,0
-0,1,1,2
-0,3,0,2
-1,2,0,2
-0,3,1,2
-0,2,1,2
-0,3,0,2
-0,3,1,2
-0,3,1,2
+2,0,2,1
+1,1,2,1
+3,1,2,0
+3,1,2,1
+3,1,1,1
+3,1,2,0
+1,1,2,1
+1,0,0,1
+3,1,2,0
+1,1,2,0
+2,0,2,1
+3,1,1,0
+2,0,2,1
+3,1,2,0
+1,0,2,1
+3,1,2,0
+3,1,0,0
+1,0,0,1
+3,1,0,0
+3,1,1,0
+2,0,2,1
+1,1,2,0
+3,1,2,0
+2,1,2,0
+2,0,2,1
+3,1,0,0
+3,1,0,0
+3,0,0,1
+3,0,2,0
+2,0,2,1
+1,1,2,0
+1,0,0,1
+3,1,2,0
+1,0,0,1
+1,0,2,1
+3,0,2,0
+3,0,2,0
+2,1,2,1
 1,1,0,0
-0,3,0,2
-1,3,1,2
-1,3,0,0
-1,1,1,0
-0,3,1,2
-1,1,1,0
-0,3,1,2
-1,3,0,2
-0,3,1,2
-1,2,0,2
-0,3,1,2
-1,3,0,1
-0,3,0,1
-0,2,1,2
-0,3,1,2
-0,3,0,1
-0,2,1,2
-0,1,1,0
-1,1,1,2
-0,3,1,0
-0,1,1,2
-0,3,1,2
-1,3,1,2
-0,2,1,2
-0,2,1,2
-0,3,1,2
-0,3,1,2
-1,1,0,2
-1,2,0,2
-0,1,1,2
-0,2,1,2
-1,2,1,2
-0,2,1,2
-0,3,1,2
-0,3,1,2
-1,3,0,2
-0,3,0,2
-1,1,1,0
-0,3,0,1
-1,1,1,0
-0,3,1,2
-0,3,1,2
-0,2,1,2
-0,2,1,0
-0,3,1,2
-0,3,1,2
-0,3,1,2
-1,1,0,2
-1,1,1,2
-1,3,0,0
-1,3,1,2
-0,3,1,0
-0,1,1,2
-0,2,1,2
-0,3,1,2
-1,3,0,1
-0,1,1,0
-0,3,1,2
+1,1,2,0
+2,0,2,1
+2,1,0,1
+3,1,2,0
+2,1,2,1
+1,1,0,1
+2,1,2,0
+3,1,1,0
+3,1,0,1
+3,0,2,1
+1,1,2,0
+1,0,0,1
 1,1,0,0
-1,1,1,2
-0,3,0,0
-0,3,1,1
-0,3,1,2
-0,2,1,2
-1,2,0,2
-1,1,1,2
-1,1,0,2
-1,3,1,0
+1,0,2,1
+3,0,2,1
+3,1,1,0
+3,1,2,0
+2,1,2,0
+3,1,2,0
+3,0,2,0
+3,1,2,0
+3,1,2,0
+3,0,2,0
+3,1,0,0
+3,1,2,1
+2,1,2,1
+1,0,2,1
+1,1,2,1
+3,0,1,1
+3,1,2,0
+3,1,2,0
+2,0,2,1
+1,0,2,1
+3,0,0,0
+3,1,2,1
+2,0,2,1
+1,0,0,1
+2,1,2,0
 1,1,0,0
-0,1,1,2
-1,1,1,2
-0,3,1,2
-0,2,1,2
-0,3,1,2
+3,1,0,0
+1,0,2,1
+2,1,2,0
+1,1,0,1
+3,1,2,0
+3,1,2,0
+3,1,2,0
+1,0,0,1
+3,1,2,0
+3,0,1,0
+2,1,2,0
+3,1,2,0
+2,0,2,1
+3,1,2,0
+3,1,0,0
+1,1,0,1
+2,0,2,1
+3,1,2,0
+1,1,2,0
+3,1,2,0
+1,1,0,1
+3,1,2,0
+3,1,2,0
+1,1,2,1
+2,0,0,1
+1,0,2,1
+3,0,2,0
+3,1,2,0
+3,0,1,1
+3,1,1,0
+3,1,2,0
+2,0,2,1
+3,1,2,0
+3,0,2,0
+2,0,2,1
+2,1,2,0
+3,1,0,0
+1,1,2,1
+3,1,0,1
+3,1,2,0
+3,1,2,0
+1,1,2,0
+2,1,1,0
+1,0,2,1
+3,1,2,0
+3,1,1,0
+1,1,2,1
+3,1,2,0
+1,1,0,1
+1,1,2,0
+3,0,2,0
+2,0,2,1
+3,1,2,0
+2,1,2,0
+3,0,2,0
+3,1,2,0
+3,1,2,0
+1,0,0,1
+3,0,2,0
+3,1,2,1
+3,0,0,1
+1,1,0,1
+3,1,2,0
+1,1,0,1
+3,1,2,0
+3,0,2,1
+3,1,2,0
+2,0,2,1
+3,1,2,0
+3,0,1,1
+3,0,1,0
+2,1,2,0
+3,1,2,0
+3,0,1,0
+2,1,2,0
 1,1,0,0
-1,2,0,2
-0,3,1,1
-0,3,1,2
-1,2,0,2
-0,3,1,2
-0,2,1,2
-0,2,1,2
-1,1,1,2
-0,3,1,2
-1,2,0,2
-1,3,0,1
-0,2,1,2
-0,3,0,2
-1,1,0,2
-0,3,1,0
-0,2,1,2
-0,2,1,2
-0,2,1,2
-0,3,1,2
-0,3,0,2
-1,1,1,0
-0,3,1,2
-0,3,1,2
-1,1,1,2
-0,1,1,2
+1,1,2,1
+3,1,0,0
+1,1,2,0
+3,1,2,0
+3,1,2,1
+2,1,2,0
+2,1,2,0
+3,1,2,0
+3,1,2,0
+1,0,2,1
+2,0,2,1
+1,1,2,0
+2,1,2,0
+2,1,2,1
+2,1,2,0
+3,1,2,0
+3,1,2,0
+3,0,2,1
+3,0,2,0
+1,1,0,1
+3,0,1,0
+1,1,0,1
+3,1,2,0
+3,1,2,0
+2,1,2,0
+2,1,0,0
+3,1,2,0
+3,1,2,0
+3,1,2,0
+1,0,2,1
+1,1,2,1
+3,0,0,1
+3,1,2,1
+3,1,0,0
+1,1,2,0
+2,1,2,0
+3,1,2,0
+3,0,1,1
 1,1,0,0
-0,3,1,2
-1,3,1,2
-0,1,1,2
-0,3,1,2
-1,2,0,2
-0,1,1,2
-0,3,1,1
-1,2,0,2
-1,3,1,2
-0,3,1,2
-0,3,1,2
-1,2,0,2
-1,2,1,2
-0,3,1,2
-0,2,1,2
-0,3,1,2
-1,1,0,2
-0,3,1,2
-0,3,1,2
-1,3,1,0
-1,1,0,2
-0,3,1,2
-1,1,0,2
-0,1,1,0
-0,3,0,1
-0,3,1,1
-0,3,1,2
-0,3,1,2
-0,3,1,2
-0,2,0,2
-0,3,1,0
-1,2,0,2
-0,3,1,2
-0,3,1,1
-1,3,0,2
-0,3,1,1
-1,1,0,2
-1,3,0,0
-1,1,0,2
-0,1,1,2
-0,3,1,2
-0,3,1,2
-0,3,1,2
-1,3,0,2
-0,3,1,1
-1,3,1,2
-0,1,1,0
-0,3,1,1
-0,2,1,2
-0,3,0,2
-0,1,1,0
-0,3,1,2
-0,2,1,2
-1,1,0,2
-1,3,0,2
-0,3,1,0
-0,3,0,2
-0,2,1,2
-1,2,0,2
-1,1,1,2
-1,3,1,0
-1,3,1,2
-0,3,1,2
-0,1,1,2
-0,3,0,2
-0,2,1,2
-1,1,0,2
-0,3,1,2
-0,3,1,2
-0,2,1,2
-0,3,0,2
-0,3,1,2
-0,1,1,2
-0,3,0,2
-0,2,1,0
-0,3,1,2
-0,3,1,2
-1,1,0,2
-1,3,1,2
-0,1,1,2
-1,3,0,2
-0,3,1,2
-0,3,1,1
-0,3,1,2
-1,2,1,0
-1,3,1,1
-1,3,0,0
-1,2,1,2
-0,3,1,0
-0,3,1,2
-0,3,1,2
+3,1,2,0
+1,0,0,1
+1,1,2,1
+3,0,0,0
+3,1,1,0
+3,1,2,0
+2,1,2,0
+2,0,2,1
+1,1,2,1
+1,0,2,1
+3,1,0,1
+1,0,0,1
+1,1,2,0
+1,1,2,1
+3,1,2,0
+2,1,2,0
+3,1,2,0
+1,0,0,1
+2,0,2,1
+3,1,1,0
+3,1,2,0
+2,0,2,1
+3,1,2,0
+2,1,2,0
+2,1,2,0
+1,1,2,1
+3,1,2,0
+2,0,2,1
+3,0,1,1
+2,1,2,0
+3,0,2,0
+1,0,2,1
+3,1,0,0
+2,1,2,0
+2,1,2,0
+2,1,2,0
+3,1,2,0
+3,0,2,0
+1,1,0,1
+3,1,2,0
+3,1,2,0
+1,1,2,1
+1,1,2,0
+1,0,0,1
+3,1,2,0
+3,1,2,1
+1,1,2,0
+3,1,2,0
+2,0,2,1
+1,1,2,0
+3,1,1,0
+2,0,2,1
+3,1,2,1
+3,1,2,0
+3,1,2,0
+2,0,2,1
+2,1,2,1
+3,1,2,0
+2,1,2,0
+3,1,2,0
+1,0,2,1
+3,1,2,0
+3,1,2,0
+3,1,0,1
+1,0,2,1
+3,1,2,0
+1,0,2,1
 1,1,0,0
-0,3,1,2
-0,3,1,2
-1,3,1,2
-1,1,1,0
-0,3,1,2
-0,2,1,2
+3,0,1,0
+3,1,1,0
+3,1,2,0
+3,1,2,0
+3,1,2,0
+2,0,2,0
+3,1,0,0
+2,0,2,1
+3,1,2,0
+3,1,1,0
+3,0,2,1
+3,1,1,0
+1,0,2,1
+3,0,0,1
+1,0,2,1
+1,1,2,0
+3,1,2,0
+3,1,2,0
+3,1,2,0
+3,0,2,1
+3,1,1,0
+3,1,2,1
 1,1,0,0
-0,3,1,0
-0,3,1,2
-0,3,1,2
-0,3,1,2
-0,3,1,0
-0,2,1,2
+3,1,1,0
+2,1,2,0
+3,0,2,0
 1,1,0,0
-0,3,1,2
-0,3,1,2
-0,3,0,0
-1,1,0,2
-0,2,0,2
-1,3,0,2
-1,1,0,2
-1,1,1,2
-1,3,0,0
-0,3,1,0
-0,3,1,2
-0,2,1,2
-1,1,0,2
-0,3,0,2
-0,2,1,2
-1,2,0,2
-1,2,0,0
-0,1,1,2
-0,3,1,2
-1,3,1,2
-0,3,1,2
-1,1,0,2
-0,1,1,2
-0,3,1,2
-1,2,0,0
-1,3,0,0
-0,3,1,2
-0,3,1,2
-0,3,1,2
-1,1,0,0
-1,2,0,2
-0,3,1,2
-0,3,0,2
-0,2,1,2
-0,3,1,2
-0,3,0,1
-0,2,1,2
-1,1,0,2
-0,3,0,2
-1,1,1,0
-0,3,1,1
\ No newline at end of file
+3,1,2,0
+2,1,2,0
+1,0,2,1
+3,0,2,1
+3,1,0,0
+3,0,2,0
+2,1,2,0
+2,0,2,1
+1,1,2,1
+3,1,0,1
+3,1,2,1
+3,1,2,0
+1,1,2,0
+3,0,2,0
+2,1,2,0
+1,0,2,1
+3,1,2,0
+3,1,2,0
+2,1,2,0
+3,0,2,0
+3,1,2,0
+1,1,2,0
+3,0,2,0
+2,1,0,0
+3,1,2,0
+3,1,2,0
+1,0,2,1
+3,1,2,1
+1,1,2,0
+3,0,2,1
+3,1,2,0
+3,1,1,0
+3,1,2,0
+2,1,0,1
+3,1,1,1
+3,0,0,1
+2,1,2,1
+3,1,0,0
+3,1,2,0
+3,1,2,0
+1,0,0,1
+3,1,2,0
+3,1,2,0
+3,1,2,1
+1,1,0,1
+3,1,2,0
+2,1,2,0
+1,0,0,1
+3,1,0,0
+3,1,2,0
+3,1,2,0
+3,1,2,0
+3,1,0,0
+2,1,2,0
+1,0,0,1
+3,1,2,0
+3,1,2,0
+3,0,0,0
+1,0,2,1
+2,0,2,0
+3,0,2,1
+1,0,2,1
+1,1,2,1
+3,0,0,1
+3,1,0,0
+3,1,2,0
+2,1,2,0
+1,0,2,1
+3,0,2,0
+2,1,2,0
+2,0,2,1
+2,0,0,1
+1,1,2,0
+3,1,2,0
+3,1,2,1
+3,1,2,0
+1,0,2,1
+1,1,2,0
+3,1,2,0
+2,0,0,1
+3,0,0,1
+3,1,2,0
+3,1,2,0
+3,1,2,0
+1,0,0,1
+2,0,2,1
+3,1,2,0
+3,0,2,0
+2,1,2,0
+3,1,2,0
+3,0,1,0
+2,1,2,0
+1,0,2,1
+3,0,2,0
+1,1,0,1
+3,1,1,0
\ No newline at end of file
diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 7f7575d..dc54d19 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -13,9 +13,9 @@ import (
 // The "c" prefix to function names indicates that they were tailored for classification
 
 // CNode is Node struct for Decision Tree Classifier
-type CNode struct {
-	Left       *CNode
-	Right      *CNode
+type classifierNode struct {
+	Left       *classifierNode
+	Right      *classifierNode
 	Threshold  float64
 	Feature    int64
 	LeftLabel  int64
@@ -26,7 +26,7 @@ type CNode struct {
 
 // CARTDecisionTreeClassifier: Tree struct for Decision Tree Classifier
 type CARTDecisionTreeClassifier struct {
-	RootNode    *CNode
+	RootNode    *classifierNode
 	criterion   string
 	maxDepth    int64
 	labels      []int64
@@ -85,7 +85,7 @@ func entropy(y []int64, labels []int64) (float64, int64) {
 }
 
 // Split the data into left node and right node based on feature and threshold - only needed for fresh nodes
-func ctestSplit(data [][]float64, feature int64, y []int64, threshold float64) ([][]float64, [][]float64, []int64, []int64) {
+func classifierCreateSplit(data [][]float64, feature int64, y []int64, threshold float64) ([][]float64, [][]float64, []int64, []int64) {
 	var left [][]float64
 	var right [][]float64
 	var lefty []int64
@@ -106,7 +106,7 @@ func ctestSplit(data [][]float64, feature int64, y []int64, threshold float64) (
 }
 
 // Helper Function to check if data point is unique or not
-func cstringInSlice(a float64, list []float64) bool {
+func classifierStringInSlice(a float64, list []float64) bool {
 	for _, b := range list {
 		if b == a {
 			return true
@@ -116,10 +116,10 @@ func cstringInSlice(a float64, list []float64) bool {
 }
 
 // Isolate only unique values. Needed for splitting data.
-func cfindUnique(data []float64) []float64 {
+func classifierFindUnique(data []float64) []float64 {
 	var unique []float64
 	for i := range data {
-		if !cstringInSlice(data[i], unique) {
+		if !classifierStringInSlice(data[i], unique) {
 			unique = append(unique, data[i])
 		}
 	}
@@ -127,7 +127,7 @@ func cfindUnique(data []float64) []float64 {
 }
 
 // Isolate only the feature being considered for splitting
-func cgetFeature(data [][]float64, feature int64) []float64 {
+func classifierGetFeature(data [][]float64, feature int64) []float64 {
 	var featureVals []float64
 	for i := range data {
 		featureVals = append(featureVals, data[i][feature])
@@ -146,7 +146,7 @@ func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64)
 }
 
 // Make sure that split being considered has not been done before
-func cvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
+func classifierValidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	for i := range triedSplits {
 		split := triedSplits[i]
 		featureTried, thresholdTried := split[0], split[1]
@@ -158,7 +158,7 @@ func cvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 }
 
 // Reorder the data by feature being considered. Optimizes code by reducing the number of times we have to loop over data for splitting
-func creOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64, []int64) {
+func classifierReOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64, []int64) {
 	s := NewSlice(featureVal)
 	sort.Sort(s)
 
@@ -176,7 +176,7 @@ func creOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float6
 }
 
 // Change data in Left Node and Right Node based on change in threshold
-func cupdateSplit(left [][]float64, lefty []int64, right [][]float64, righty []int64, feature int64, threshold float64) ([][]float64, []int64, [][]float64, []int64) {
+func classifierUpdateSplit(left [][]float64, lefty []int64, right [][]float64, righty []int64, feature int64, threshold float64) ([][]float64, []int64, [][]float64, []int64) {
 
 	for right[0][feature] < threshold {
 		left = append(left, right[0])
@@ -190,17 +190,17 @@ func cupdateSplit(left [][]float64, lefty []int64, right [][]float64, righty []i
 
 // Fit - Method visible to user to train tree
 func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) {
-	var emptyNode CNode
+	var emptyNode classifierNode
 
 	data := classifierConvertInstancesToProblemVec(X)
 	y := classifierConvertInstancesToLabelVec(X)
-	emptyNode = cbestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)
+	emptyNode = classifierBestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)
 
 	tree.RootNode = &emptyNode
 }
 
 // Iterativly find and record the best split - recursive function
-func cbestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, labels []int64, upperNode CNode, criterion string, maxDepth int64, depth int64) CNode {
+func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, labels []int64, upperNode classifierNode, criterion string, maxDepth int64, depth int64) classifierNode {
 
 	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
 	depth++
@@ -236,16 +236,16 @@ func cbestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, la
 
 	upperNode.Use_not = true
 
-	var leftN CNode
-	var rightN CNode
+	var leftN classifierNode
+	var rightN classifierNode
 	// Iterate over all features
 	for i := 0; i < numFeatures; i++ {
-		featureVal := cgetFeature(data, int64(i))
-		unique := cfindUnique(featureVal)
+		featureVal := classifierGetFeature(data, int64(i))
+		unique := classifierFindUnique(featureVal)
 		sort.Float64s(unique)
 		numUnique := len(unique)
 
-		sortData, sortY := creOrderData(featureVal, data, y)
+		sortData, sortY := classifierReOrderData(featureVal, data, y)
 
 		firstTime := true
 
@@ -256,14 +256,14 @@ func cbestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, la
 			if j != (numUnique - 1) {
 				threshold := (unique[j] + unique[j+1]) / 2
 				// Ensure that same split has not been made before
-				if cvalidate(tree.triedSplits, int64(i), threshold) {
+				if classifierValidate(tree.triedSplits, int64(i), threshold) {
 					// We need to split data from fresh when considering new feature for the first time.
 					// Otherwise, we need to update the split by moving data points from left to right.
 					if firstTime {
-						left, right, lefty, righty = ctestSplit(sortData, int64(i), sortY, threshold)
+						left, right, lefty, righty = classifierCreateSplit(sortData, int64(i), sortY, threshold)
 						firstTime = false
 					} else {
-						left, lefty, right, righty = cupdateSplit(left, lefty, right, righty, int64(i), threshold)
+						left, lefty, right, righty = classifierUpdateSplit(left, lefty, right, righty, int64(i), threshold)
 					}
 
 					var leftGini float64
@@ -314,7 +314,7 @@ func cbestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, la
 		if bestLeftGini > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			// Recursive splitting logic
-			leftN = cbestSplit(tree, bestLeft, bestLefty, labels, leftN, criterion, maxDepth, depth)
+			leftN = classifierBestSplit(tree, bestLeft, bestLefty, labels, leftN, criterion, maxDepth, depth)
 			if leftN.Use_not == true {
 				upperNode.Left = &leftN
 			}
@@ -324,7 +324,7 @@ func cbestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, la
 		if bestRightGini > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			// Recursive splitting logic
-			rightN = cbestSplit(tree, bestRight, bestRighty, labels, rightN, criterion, maxDepth, depth)
+			rightN = classifierBestSplit(tree, bestRight, bestRighty, labels, rightN, criterion, maxDepth, depth)
 			if rightN.Use_not == true {
 				upperNode.Right = &rightN
 			}
@@ -339,10 +339,10 @@ func cbestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, la
 // PrintTree : this function prints out entire tree for visualization - visible to user
 func (tree *CARTDecisionTreeClassifier) String() string {
 	rootNode := *tree.RootNode
-	return cprintTreeFromNode(rootNode, "")
+	return classifierPrintTreeFromNode(rootNode, "")
 }
 
-func cprintTreeFromNode(tree CNode, spacing string) string {
+func classifierPrintTreeFromNode(tree classifierNode, spacing string) string {
 	returnString := ""
 	returnString += spacing + "Feature "
 	returnString += strconv.FormatInt(tree.Feature, 10)
@@ -365,30 +365,30 @@ func cprintTreeFromNode(tree CNode, spacing string) string {
 
 	if tree.Left != nil {
 		returnString += spacing + "---> True" + "\n"
-		returnString += cprintTreeFromNode(*tree.Left, spacing+"  ")
+		returnString += classifierPrintTreeFromNode(*tree.Left, spacing+"  ")
 	}
 
 	if tree.Right != nil {
 		returnString += spacing + "---> False" + "\n"
-		returnString += cprintTreeFromNode(*tree.Right, spacing+"  ")
+		returnString += classifierPrintTreeFromNode(*tree.Right, spacing+"  ")
 	}
 
 	return returnString
 }
 
 // Predict a single data point by traversing the entire tree
-func cpredictSingle(tree CNode, instance []float64) int64 {
+func classifierPredictSingle(tree classifierNode, instance []float64) int64 {
 	if instance[tree.Feature] < tree.Threshold {
 		if tree.Left == nil {
 			return tree.LeftLabel
 		} else {
-			return cpredictSingle(*tree.Left, instance)
+			return classifierPredictSingle(*tree.Left, instance)
 		}
 	} else {
 		if tree.Right == nil {
 			return tree.RightLabel
 		} else {
-			return cpredictSingle(*tree.Right, instance)
+			return classifierPredictSingle(*tree.Right, instance)
 		}
 	}
 }
@@ -397,14 +397,14 @@ func cpredictSingle(tree CNode, instance []float64) int64 {
 func (tree *CARTDecisionTreeClassifier) Predict(X_test base.FixedDataGrid) []int64 {
 	root := *tree.RootNode
 	test := classifierConvertInstancesToProblemVec(X_test)
-	return cpredictFromNode(root, test)
+	return classifierPredictFromNode(root, test)
 }
 
 // This function uses the rootnode from Predict. It is invisible to user, but called from predict method.
-func cpredictFromNode(tree CNode, test [][]float64) []int64 {
+func classifierPredictFromNode(tree classifierNode, test [][]float64) []int64 {
 	var preds []int64
 	for i := range test {
-		iPred := cpredictSingle(tree, test[i])
+		iPred := classifierPredictSingle(tree, test[i])
 		preds = append(preds, iPred)
 	}
 	return preds
@@ -415,11 +415,11 @@ func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) float6
 	rootNode := *tree.RootNode
 	xTest := classifierConvertInstancesToProblemVec(test)
 	yTest := classifierConvertInstancesToLabelVec(test)
-	return cevaluateFromNode(rootNode, xTest, yTest)
+	return classifierEvaluateFromNode(rootNode, xTest, yTest)
 }
 
-func cevaluateFromNode(tree CNode, xTest [][]float64, yTest []int64) float64 {
-	preds := cpredictFromNode(tree, xTest)
+func classifierEvaluateFromNode(tree classifierNode, xTest [][]float64, yTest []int64) float64 {
+	preds := classifierPredictFromNode(tree, xTest)
 	accuracy := 0.0
 	for i := range preds {
 		if preds[i] == yTest[i] {
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index 6841a71..d894db5 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -13,9 +13,9 @@ import (
 // The "r" prefix to all function names indicates that they were tailored to support regression.
 
 // RNode - Node struct for Decision Tree Regressor
-type RNode struct {
-	Left      *RNode
-	Right     *RNode
+type regressorNode struct {
+	Left      *regressorNode
+	Right     *regressorNode
 	Threshold float64
 	Feature   int64
 	LeftPred  float64
@@ -25,22 +25,12 @@ type RNode struct {
 
 // CARTDecisionTreeRegressor - Tree struct for Decision Tree Regressor
 type CARTDecisionTreeRegressor struct {
-	RootNode    *RNode
+	RootNode    *regressorNode
 	criterion   string
 	maxDepth    int64
 	triedSplits [][]float64
 }
 
-// Calculate Mean Absolute Error for a constant prediction
-func meanAbsoluteError(y []float64, yBar float64) float64 {
-	error := 0.0
-	for _, target := range y {
-		error += math.Abs(target - yBar)
-	}
-	error /= float64(len(y))
-	return error
-}
-
 // Find average
 func average(y []float64) float64 {
 	mean := 0.0
@@ -51,6 +41,16 @@ func average(y []float64) float64 {
 	return mean
 }
 
+// Calculate Mean Absolute Error for a constant prediction
+func meanAbsoluteError(y []float64, yBar float64) float64 {
+	error := 0.0
+	for _, target := range y {
+		error += math.Abs(target - yBar)
+	}
+	error /= float64(len(y))
+	return error
+}
+
 // Turn Mean Absolute Error into impurity function for decision trees.
 func maeImpurity(y []float64) (float64, float64) {
 	yHat := average(y)
@@ -75,7 +75,7 @@ func mseImpurity(y []float64) (float64, float64) {
 }
 
 // Split the data based on threshold and feature for testing information gain
-func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
+func regressorCreateSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
 	var left [][]float64
 	var lefty []float64
 	var right [][]float64
@@ -96,7 +96,7 @@ func rtestSplit(data [][]float64, feature int64, y []float64, threshold float64)
 }
 
 // Helper function for finding unique values
-func rstringInSlice(a float64, list []float64) bool {
+func regressorStringInSlice(a float64, list []float64) bool {
 	for _, b := range list {
 		if b == a {
 			return true
@@ -106,10 +106,10 @@ func rstringInSlice(a float64, list []float64) bool {
 }
 
 // Return only unique values of a feature
-func rfindUnique(data []float64) []float64 {
+func regressorFindUnique(data []float64) []float64 {
 	var unique []float64
 	for i := range data {
-		if !rstringInSlice(data[i], unique) {
+		if !regressorStringInSlice(data[i], unique) {
 			unique = append(unique, data[i])
 		}
 	}
@@ -117,7 +117,7 @@ func rfindUnique(data []float64) []float64 {
 }
 
 // Extract out a single feature from data
-func rgetFeature(data [][]float64, feature int64) []float64 {
+func regressorGetFeature(data [][]float64, feature int64) []float64 {
 	var featureVals []float64
 	for i := range data {
 		featureVals = append(featureVals, data[i][feature])
@@ -134,7 +134,7 @@ func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDecisionTre
 }
 
 // Validate that the split being tested has not been done before.
-func rvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
+func regressorValidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	for i := range triedSplits {
 		split := triedSplits[i]
 		featureTried, thresholdTried := split[0], split[1]
@@ -146,7 +146,7 @@ func rvalidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 }
 
 // Re order data based on a feature for optimizing code
-func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
+func regressorReOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
 	s := NewSlice(featureVal)
 	sort.Sort(s)
 
@@ -164,7 +164,7 @@ func rreOrderData(featureVal []float64, data [][]float64, y []float64) ([][]floa
 }
 
 // Update the left and right data based on change in threshold
-func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty []float64, feature int64, threshold float64) ([][]float64, []float64, [][]float64, []float64) {
+func regressorUpdateSplit(left [][]float64, lefty []float64, right [][]float64, righty []float64, feature int64, threshold float64) ([][]float64, []float64, [][]float64, []float64) {
 
 	for right[0][feature] < threshold {
 		left = append(left, right[0])
@@ -178,17 +178,17 @@ func rupdateSplit(left [][]float64, lefty []float64, right [][]float64, righty [
 
 // Extra Method for creating simple to use interface. Many params are either redundant for user but are needed only for recursive logic.
 func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) {
-	var emptyNode RNode
+	var emptyNode regressorNode
 	data := regressorConvertInstancesToProblemVec(X)
 	y := regressorConvertInstancesToLabelVec(X)
 
-	emptyNode = rbestSplit(*tree, data, y, emptyNode, tree.criterion, tree.maxDepth, 0)
+	emptyNode = regressorBestSplit(*tree, data, y, emptyNode, tree.criterion, tree.maxDepth, 0)
 
 	tree.RootNode = &emptyNode
 }
 
 // Essentially the Fit Method - Impelements recursive logic
-func rbestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, upperNode RNode, criterion string, maxDepth int64, depth int64) RNode {
+func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, upperNode regressorNode, criterion string, maxDepth int64, depth int64) regressorNode {
 
 	depth++
 
@@ -220,16 +220,16 @@ func rbestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, u
 
 	upperNode.Use_not = true
 
-	var leftN RNode
-	var rightN RNode
+	var leftN regressorNode
+	var rightN regressorNode
 	// Iterate over all features
 	for i := 0; i < numFeatures; i++ {
-		featureVal := rgetFeature(data, int64(i))
-		unique := rfindUnique(featureVal)
+		featureVal := regressorGetFeature(data, int64(i))
+		unique := regressorFindUnique(featureVal)
 		sort.Float64s(unique)
 		numUnique := len(unique)
 
-		sortData, sortY := rreOrderData(featureVal, data, y)
+		sortData, sortY := regressorReOrderData(featureVal, data, y)
 
 		firstTime := true
 
@@ -239,12 +239,12 @@ func rbestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, u
 		for j := range unique {
 			if j != (numUnique - 1) {
 				threshold := (unique[j] + unique[j+1]) / 2
-				if rvalidate(tree.triedSplits, int64(i), threshold) {
+				if regressorValidate(tree.triedSplits, int64(i), threshold) {
 					if firstTime {
-						left, right, lefty, righty = rtestSplit(sortData, int64(i), sortY, threshold)
+						left, right, lefty, righty = regressorCreateSplit(sortData, int64(i), sortY, threshold)
 						firstTime = false
 					} else {
-						left, lefty, right, righty = rupdateSplit(left, lefty, right, righty, int64(i), threshold)
+						left, lefty, right, righty = regressorUpdateSplit(left, lefty, right, righty, int64(i), threshold)
 					}
 
 					var leftLoss float64
@@ -292,7 +292,7 @@ func rbestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, u
 
 		if bestLeftLoss > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
-			leftN = rbestSplit(tree, bestLeft, bestLefty, leftN, criterion, maxDepth, depth)
+			leftN = regressorBestSplit(tree, bestLeft, bestLefty, leftN, criterion, maxDepth, depth)
 			if leftN.Use_not == true {
 				upperNode.Left = &leftN
 			}
@@ -300,7 +300,7 @@ func rbestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, u
 		}
 		if bestRightLoss > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
-			rightN = rbestSplit(tree, bestRight, bestRighty, rightN, criterion, maxDepth, depth)
+			rightN = regressorBestSplit(tree, bestRight, bestRighty, rightN, criterion, maxDepth, depth)
 			if rightN.Use_not == true {
 				upperNode.Right = &rightN
 			}
@@ -315,10 +315,10 @@ func rbestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, u
 // Print Tree for Visualtion - calls printTreeFromNode()
 func (tree *CARTDecisionTreeRegressor) String() string {
 	rootNode := *tree.RootNode
-	return rprintTreeFromNode(rootNode, "")
+	return regressorPrintTreeFromNode(rootNode, "")
 }
 
-func rprintTreeFromNode(tree RNode, spacing string) string {
+func regressorPrintTreeFromNode(tree regressorNode, spacing string) string {
 	returnString := ""
 	returnString += spacing + "Feature "
 	returnString += strconv.FormatInt(tree.Feature, 10)
@@ -341,31 +341,31 @@ func rprintTreeFromNode(tree RNode, spacing string) string {
 	if tree.Left != nil {
 		// fmt.Println(spacing + "---> True")
 		returnString += spacing + "---> True" + "\n"
-		returnString += rprintTreeFromNode(*tree.Left, spacing+"  ")
+		returnString += regressorPrintTreeFromNode(*tree.Left, spacing+"  ")
 	}
 
 	if tree.Right != nil {
 		// fmt.Println(spacing + "---> False")
 		returnString += spacing + "---> False" + "\n"
-		returnString += rprintTreeFromNode(*tree.Right, spacing+"  ")
+		returnString += regressorPrintTreeFromNode(*tree.Right, spacing+"  ")
 	}
 
 	return returnString
 }
 
 // Predict a single data point
-func rpredictSingle(tree RNode, instance []float64) float64 {
+func regressorPredictSingle(tree regressorNode, instance []float64) float64 {
 	if instance[tree.Feature] < tree.Threshold {
 		if tree.Left == nil {
 			return tree.LeftPred
 		} else {
-			return rpredictSingle(*tree.Left, instance)
+			return regressorPredictSingle(*tree.Left, instance)
 		}
 	} else {
 		if tree.Right == nil {
 			return tree.RightPred
 		} else {
-			return rpredictSingle(*tree.Right, instance)
+			return regressorPredictSingle(*tree.Right, instance)
 		}
 	}
 }
@@ -374,14 +374,14 @@ func rpredictSingle(tree RNode, instance []float64) float64 {
 func (tree *CARTDecisionTreeRegressor) Predict(X_test base.FixedDataGrid) []float64 {
 	root := *tree.RootNode
 	test := regressorConvertInstancesToProblemVec(X_test)
-	return rpredictFromNode(root, test)
+	return regressorPredictFromNode(root, test)
 }
 
 // Use tree's root node to print out entire tree
-func rpredictFromNode(tree RNode, test [][]float64) []float64 {
+func regressorPredictFromNode(tree regressorNode, test [][]float64) []float64 {
 	var preds []float64
 	for i := range test {
-		i_pred := rpredictSingle(tree, test[i])
+		i_pred := regressorPredictSingle(tree, test[i])
 		preds = append(preds, i_pred)
 	}
 	return preds

From 91a27e3ca0f4d93f6331b647c1b5aeaef50a59e1 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Mon, 27 Jul 2020 15:03:12 +0530
Subject: [PATCH 09/24] Fixing Comments

---
 examples/trees/cart.go   |  1 -
 trees/cart_classifier.go | 53 ++++++++++++++++++++++++++--------------
 trees/cart_regressor.go  | 52 +++++++++++++++++++++++++++------------
 3 files changed, 71 insertions(+), 35 deletions(-)

diff --git a/examples/trees/cart.go b/examples/trees/cart.go
index 931b7e5..833e287 100644
--- a/examples/trees/cart.go
+++ b/examples/trees/cart.go
@@ -36,7 +36,6 @@ func main() {
 	fmt.Println(decTree.Evaluate(testData))
 
 	// Load House Price Data For Regression
-
 	regressionData, err := base.ParseCSVToInstances("../datasets/boston_house_prices.csv", false)
 	if err != nil {
 		panic(err)
diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index dc54d19..9ae3b8e 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -10,9 +10,13 @@ import (
 	"github.com/sjwhitworth/golearn/base"
 )
 
-// The "c" prefix to function names indicates that they were tailored for classification
+const (
+	GINI    string = "gini"
+	ENTROPY string = "entropy"
+)
 
-// CNode is Node struct for Decision Tree Classifier
+// CNode is Node struct for Decision Tree Classifier.
+// It holds the information for each split (which feature to use, what threshold, and which label to assign for each side of the split)
 type classifierNode struct {
 	Left       *classifierNode
 	Right      *classifierNode
@@ -25,6 +29,8 @@ type classifierNode struct {
 }
 
 // CARTDecisionTreeClassifier: Tree struct for Decision Tree Classifier
+// It contains the rootNode, as well as all of the hyperparameters chosen by the user.
+// It also keeps track of all splits done at the tree level.
 type CARTDecisionTreeClassifier struct {
 	RootNode    *classifierNode
 	criterion   string
@@ -84,7 +90,7 @@ func entropy(y []int64, labels []int64) (float64, int64) {
 	return entropy, maxLabel
 }
 
-// Split the data into left node and right node based on feature and threshold - only needed for fresh nodes
+// Split the data into left node and right node based on feature and threshold
 func classifierCreateSplit(data [][]float64, feature int64, y []int64, threshold float64) ([][]float64, [][]float64, []int64, []int64) {
 	var left [][]float64
 	var right [][]float64
@@ -105,7 +111,8 @@ func classifierCreateSplit(data [][]float64, feature int64, y []int64, threshold
 	return left, right, lefty, righty
 }
 
-// Helper Function to check if data point is unique or not
+// Helper Function to check if data point is unique or not.
+// We will use this to isolate unique values of a feature
 func classifierStringInSlice(a float64, list []float64) bool {
 	for _, b := range list {
 		if b == a {
@@ -115,7 +122,7 @@ func classifierStringInSlice(a float64, list []float64) bool {
 	return false
 }
 
-// Isolate only unique values. Needed for splitting data.
+// Isolate only unique values. This way, we can try only unique splits and not redundant ones.
 func classifierFindUnique(data []float64) []float64 {
 	var unique []float64
 	for i := range data {
@@ -126,7 +133,7 @@ func classifierFindUnique(data []float64) []float64 {
 	return unique
 }
 
-// Isolate only the feature being considered for splitting
+// Isolate only the feature being considered for splitting. Reduces the complexity in managing splits.
 func classifierGetFeature(data [][]float64, feature int64) []float64 {
 	var featureVals []float64
 	for i := range data {
@@ -135,7 +142,8 @@ func classifierGetFeature(data [][]float64, feature int64) []float64 {
 	return featureVals
 }
 
-// Function to Create New Decision Tree Classifier
+// Function to Create New Decision Tree Classifier.
+// It assigns all of the hyperparameters by user into the tree attributes.
 func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64) *CARTDecisionTreeClassifier {
 	var tree CARTDecisionTreeClassifier
 	tree.criterion = strings.ToLower(criterion)
@@ -145,7 +153,8 @@ func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64)
 	return &tree
 }
 
-// Make sure that split being considered has not been done before
+// Make sure that split being considered has not been done before.
+// Else we will unnecessarily try splits that won't improve Impurity.
 func classifierValidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	for i := range triedSplits {
 		split := triedSplits[i]
@@ -175,7 +184,7 @@ func classifierReOrderData(featureVal []float64, data [][]float64, y []int64) ([
 	return dataSorted, ySorted
 }
 
-// Change data in Left Node and Right Node based on change in threshold
+// Update the left and right side of the split based on the threshold.
 func classifierUpdateSplit(left [][]float64, lefty []int64, right [][]float64, righty []int64, feature int64, threshold float64) ([][]float64, []int64, [][]float64, []int64) {
 
 	for right[0][feature] < threshold {
@@ -188,7 +197,8 @@ func classifierUpdateSplit(left [][]float64, lefty []int64, right [][]float64, r
 	return left, lefty, right, righty
 }
 
-// Fit - Method visible to user to train tree
+// Fit - Creates an Emppty Root Node
+// Trains the tree by calling recursive function classifierBestSplit
 func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) {
 	var emptyNode classifierNode
 
@@ -199,7 +209,8 @@ func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) {
 	tree.RootNode = &emptyNode
 }
 
-// Iterativly find and record the best split - recursive function
+// Iterativly find and record the best split
+// Stop If depth reaches maxDepth or nodes are pure
 func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, labels []int64, upperNode classifierNode, criterion string, maxDepth int64, depth int64) classifierNode {
 
 	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
@@ -214,9 +225,9 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 	var origGini float64
 
 	// Calculate loss based on Criterion Specified by user
-	if criterion == "gini" {
+	if criterion == GINI {
 		origGini, upperNode.LeftLabel = giniImpurity(y, labels)
-	} else if criterion == "entropy" {
+	} else if criterion == ENTROPY {
 		origGini, upperNode.LeftLabel = entropy(y, labels)
 	} else {
 		panic("Invalid impurity function, choose from GINI or ENTROPY")
@@ -271,10 +282,10 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 					var leftLabels int64
 					var rightLabels int64
 
-					if criterion == "gini" {
+					if criterion == GINI {
 						leftGini, leftLabels = giniImpurity(lefty, labels)
 						rightGini, rightLabels = giniImpurity(righty, labels)
-					} else if criterion == "entropy" {
+					} else if criterion == ENTROPY {
 						leftGini, leftLabels = entropy(lefty, labels)
 						rightGini, rightLabels = entropy(righty, labels)
 					}
@@ -336,7 +347,8 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 	return upperNode
 }
 
-// PrintTree : this function prints out entire tree for visualization - visible to user
+// String : this function prints out entire tree for visualization.
+// Calls a recursive function to print the tree - classifierPrintTreeFromNode
 func (tree *CARTDecisionTreeClassifier) String() string {
 	rootNode := *tree.RootNode
 	return classifierPrintTreeFromNode(rootNode, "")
@@ -377,6 +389,7 @@ func classifierPrintTreeFromNode(tree classifierNode, spacing string) string {
 }
 
 // Predict a single data point by traversing the entire tree
+// Uses recursive logic to navigate the tree.
 func classifierPredictSingle(tree classifierNode, instance []float64) int64 {
 	if instance[tree.Feature] < tree.Threshold {
 		if tree.Left == nil {
@@ -393,14 +406,15 @@ func classifierPredictSingle(tree classifierNode, instance []float64) int64 {
 	}
 }
 
-// Predict is visible to user. Given test data, they receive predictions for every datapoint.
+// Given test data, return predictions for every datapoint. calls classifierPredictFromNode
 func (tree *CARTDecisionTreeClassifier) Predict(X_test base.FixedDataGrid) []int64 {
 	root := *tree.RootNode
 	test := classifierConvertInstancesToProblemVec(X_test)
 	return classifierPredictFromNode(root, test)
 }
 
-// This function uses the rootnode from Predict. It is invisible to user, but called from predict method.
+// This function uses the rootnode from Predict.
+// It iterates through every data point and calls the recursive function to give predictions and then summarizes them.
 func classifierPredictFromNode(tree classifierNode, test [][]float64) []int64 {
 	var preds []int64
 	for i := range test {
@@ -411,6 +425,8 @@ func classifierPredictFromNode(tree classifierNode, test [][]float64) []int64 {
 }
 
 // Given Test data and label, return the accuracy of the classifier.
+// First it retreives predictions from the data, then compares for accuracy.
+// Calls classifierEvaluateFromNode
 func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) float64 {
 	rootNode := *tree.RootNode
 	xTest := classifierConvertInstancesToProblemVec(test)
@@ -418,6 +434,7 @@ func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) float6
 	return classifierEvaluateFromNode(rootNode, xTest, yTest)
 }
 
+// Retrieve predictions and then calculate accuracy.
 func classifierEvaluateFromNode(tree classifierNode, xTest [][]float64, yTest []int64) float64 {
 	preds := classifierPredictFromNode(tree, xTest)
 	accuracy := 0.0
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index d894db5..34b7880 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -10,9 +10,14 @@ import (
 	"github.com/sjwhitworth/golearn/base"
 )
 
-// The "r" prefix to all function names indicates that they were tailored to support regression.
+const (
+	MAE string = "mae"
+	MSE string = "mse"
+)
 
 // RNode - Node struct for Decision Tree Regressor
+// It holds the information for each split
+// Which feature to use, threshold, left prediction and right prediction
 type regressorNode struct {
 	Left      *regressorNode
 	Right     *regressorNode
@@ -24,6 +29,8 @@ type regressorNode struct {
 }
 
 // CARTDecisionTreeRegressor - Tree struct for Decision Tree Regressor
+// It contains the rootNode, as well as the hyperparameters chosen by user.
+// Also keeps track of splits used at tree level.
 type CARTDecisionTreeRegressor struct {
 	RootNode    *regressorNode
 	criterion   string
@@ -74,7 +81,7 @@ func mseImpurity(y []float64) (float64, float64) {
 	return meanSquaredError(y, yHat), yHat
 }
 
-// Split the data based on threshold and feature for testing information gain
+// Split the data into left and right based on trehsold and feature.
 func regressorCreateSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
 	var left [][]float64
 	var lefty []float64
@@ -95,7 +102,8 @@ func regressorCreateSplit(data [][]float64, feature int64, y []float64, threshol
 	return left, right, lefty, righty
 }
 
-// Helper function for finding unique values
+// Helper function for finding unique values.
+// Used for isolating unique values in a feature.
 func regressorStringInSlice(a float64, list []float64) bool {
 	for _, b := range list {
 		if b == a {
@@ -105,7 +113,8 @@ func regressorStringInSlice(a float64, list []float64) bool {
 	return false
 }
 
-// Return only unique values of a feature
+// Isolate only unique values.
+// This way we can only try unique splits.
 func regressorFindUnique(data []float64) []float64 {
 	var unique []float64
 	for i := range data {
@@ -116,7 +125,8 @@ func regressorFindUnique(data []float64) []float64 {
 	return unique
 }
 
-// Extract out a single feature from data
+// Extract out a single feature from data.
+// Reduces complexity in managing splits and sorting
 func regressorGetFeature(data [][]float64, feature int64) []float64 {
 	var featureVals []float64
 	for i := range data {
@@ -125,7 +135,7 @@ func regressorGetFeature(data [][]float64, feature int64) []float64 {
 	return featureVals
 }
 
-// Interface for creating new Decision Tree Regressor - cals rbestSplit()
+// Interface for creating new Decision Tree Regressor
 func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDecisionTreeRegressor {
 	var tree CARTDecisionTreeRegressor
 	tree.maxDepth = maxDepth
@@ -134,6 +144,7 @@ func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDecisionTre
 }
 
 // Validate that the split being tested has not been done before.
+// This prevents redundant splits from hapenning.
 func regressorValidate(triedSplits [][]float64, feature int64, threshold float64) bool {
 	for i := range triedSplits {
 		split := triedSplits[i]
@@ -146,6 +157,7 @@ func regressorValidate(triedSplits [][]float64, feature int64, threshold float64
 }
 
 // Re order data based on a feature for optimizing code
+// Helps in updating splits without reiterating entire dataset
 func regressorReOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
 	s := NewSlice(featureVal)
 	sort.Sort(s)
@@ -176,7 +188,8 @@ func regressorUpdateSplit(left [][]float64, lefty []float64, right [][]float64,
 	return left, lefty, right, righty
 }
 
-// Extra Method for creating simple to use interface. Many params are either redundant for user but are needed only for recursive logic.
+// Fit - Build the tree using the data
+// Creates empty root node and builds tree by calling regressorBestSplit
 func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) {
 	var emptyNode regressorNode
 	data := regressorConvertInstancesToProblemVec(X)
@@ -187,7 +200,8 @@ func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) {
 	tree.RootNode = &emptyNode
 }
 
-// Essentially the Fit Method - Impelements recursive logic
+// Builds the tree by iteratively finding the best split.
+// Recursive function - stops if maxDepth is reached or nodes are pure
 func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, upperNode regressorNode, criterion string, maxDepth int64, depth int64) regressorNode {
 
 	depth++
@@ -200,10 +214,12 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 	var bestLoss float64
 	var origLoss float64
 
-	if criterion == "mae" {
+	if criterion == MAE {
 		origLoss, upperNode.LeftPred = maeImpurity(y)
-	} else {
+	} else if criterion == MSE {
 		origLoss, upperNode.LeftPred = mseImpurity(y)
+	} else {
+		panic("Invalid impurity function, choose from MAE or MSE")
 	}
 
 	bestLoss = origLoss
@@ -252,10 +268,10 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 					var leftPred float64
 					var rightPred float64
 
-					if criterion == "mae" {
+					if criterion == MAE {
 						leftLoss, leftPred = maeImpurity(lefty)
 						rightLoss, rightPred = maeImpurity(righty)
-					} else {
+					} else if criterion == MSE {
 						leftLoss, leftPred = mseImpurity(lefty)
 						rightLoss, rightPred = mseImpurity(righty)
 					}
@@ -312,12 +328,13 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 	return upperNode
 }
 
-// Print Tree for Visualtion - calls printTreeFromNode()
+// Print Tree for Visualtion - calls regressorPrintTreeFromNode()
 func (tree *CARTDecisionTreeRegressor) String() string {
 	rootNode := *tree.RootNode
 	return regressorPrintTreeFromNode(rootNode, "")
 }
 
+// Recursively explore the entire tree and print out all details such as threshold, feature, prediction
 func regressorPrintTreeFromNode(tree regressorNode, spacing string) string {
 	returnString := ""
 	returnString += spacing + "Feature "
@@ -353,7 +370,8 @@ func regressorPrintTreeFromNode(tree regressorNode, spacing string) string {
 	return returnString
 }
 
-// Predict a single data point
+// Predict a single data point by navigating to rootNodes.
+// Uses a recursive logic
 func regressorPredictSingle(tree regressorNode, instance []float64) float64 {
 	if instance[tree.Feature] < tree.Threshold {
 		if tree.Left == nil {
@@ -370,14 +388,16 @@ func regressorPredictSingle(tree regressorNode, instance []float64) float64 {
 	}
 }
 
-// Predict method for multiple data points. Calls predictFromNode()
+// Predict method for multiple data points.
+// First converts input data into usable format, and then calls regressorPredictFromNode
 func (tree *CARTDecisionTreeRegressor) Predict(X_test base.FixedDataGrid) []float64 {
 	root := *tree.RootNode
 	test := regressorConvertInstancesToProblemVec(X_test)
 	return regressorPredictFromNode(root, test)
 }
 
-// Use tree's root node to print out entire tree
+// Use tree's root node to print out entire tree.
+// Iterates over all data points and calls regressorPredictSingle to predict individual datapoints.
 func regressorPredictFromNode(tree regressorNode, test [][]float64) []float64 {
 	var preds []float64
 	for i := range test {

From ef751e62c484badf66a142053e7c7b55eb5e38f2 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Mon, 27 Jul 2020 17:08:44 +0530
Subject: [PATCH 10/24] Adding cart_test.go

---
 trees/cart_test.go | 109 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 trees/cart_test.go

diff --git a/trees/cart_test.go b/trees/cart_test.go
new file mode 100644
index 0000000..047392a
--- /dev/null
+++ b/trees/cart_test.go
@@ -0,0 +1,109 @@
+package trees
+
+import (
+	"fmt"
+	"testing"
+
+	. "github.com/smartystreets/goconvey/convey"
+)
+
+func TestRegressor(t *testing.T) {
+
+	Convey("Doing a CART Test", t, func() {
+		// For Classification Trees:
+
+		// Is Gini being calculated correctly
+		gini, giniMaxLabel := giniImpurity([]int64{1, 0, 0, 1}, []int64{0, 1})
+		So(gini, ShouldEqual, 0.5)
+		So(giniMaxLabel, ShouldNotBeNil)
+
+		// Is Entropy being calculated correctly
+		entropy, entropyMaxLabel := entropy([]int64{1, 0, 0, 1}, []int64{0, 1})
+		So(entropy, ShouldEqual, 1.0)
+		So(entropyMaxLabel, ShouldNotBeNil)
+
+		// Is Data being split into left and right properly
+		classifierData := [][]float64{[]float64{1, 3, 6},
+			[]float64{1, 2, 3},
+			[]float64{1, 9, 6},
+			[]float64{1, 11, 1}}
+
+		classifiery := []int64{0, 1, 0, 0}
+
+		leftdata, rightdata, lefty, righty := classifierCreateSplit(classifierData, 1, classifiery, 5.0)
+
+		So(len(leftdata), ShouldEqual, 2)
+		So(len(lefty), ShouldEqual, 2)
+		So(len(rightdata), ShouldEqual, 2)
+		So(len(righty), ShouldEqual, 2)
+
+		// Is isolating unique values working properly
+		So(len(classifierFindUnique([]float64{10, 1, 1})), ShouldEqual, 2)
+
+		// is data reordered correctly
+		orderedData, orderedY := classifierReOrderData(classifierGetFeature(classifierData, 1), classifierData, classifiery)
+		fmt.Println(orderedData)
+		fmt.Println(orderedY)
+		So(orderedData[1][1], ShouldEqual, 3.0)
+		So(orderedY[0], ShouldEqual, 1)
+
+		// Is split being updated properly based on threshold
+		leftdata, lefty, rightdata, righty = classifierUpdateSplit(leftdata, lefty, rightdata, righty, 1, 9.5)
+		So(len(leftdata), ShouldEqual, 3)
+		So(len(rightdata), ShouldEqual, 1)
+
+		// Is the root Node null when tree is not trained?
+		tree := NewDecisionTreeClassifier("gini", -1, []int64{0, 1})
+		So(tree.RootNode, ShouldBeNil)
+		So(tree.triedSplits, ShouldBeEmpty)
+
+		// ------------------------------------------
+		// For Regression Trees
+
+		// Is MAE being calculated correctly
+		mae, maeMaxLabel := maeImpurity([]float64{1, 3, 5})
+		So(mae, ShouldEqual, (4.0 / 3.0))
+		So(maeMaxLabel, ShouldNotBeNil)
+
+		// Is Entropy being calculated correctly
+		mse, mseMaxLabel := mseImpurity([]float64{1, 3, 5})
+		So(mse, ShouldEqual, (8.0 / 3.0))
+		So(mseMaxLabel, ShouldNotBeNil)
+
+		// Is Data being split into left and right properly
+		data := [][]float64{[]float64{1, 3, 6},
+			[]float64{1, 2, 3},
+			[]float64{1, 9, 6},
+			[]float64{1, 11, 1}}
+
+		y := []float64{1, 2, 3, 4}
+
+		leftData, rightData, leftY, rightY := regressorCreateSplit(data, 1, y, 5.0)
+
+		So(len(leftData), ShouldEqual, 2)
+		So(len(lefty), ShouldEqual, 2)
+		So(len(rightData), ShouldEqual, 2)
+		So(len(righty), ShouldEqual, 2)
+
+		// Is isolating unique values working properly
+		So(len(regressorFindUnique([]float64{10, 1, 1})), ShouldEqual, 2)
+
+		// is data reordered correctly
+		regressorOrderedData, regressorOrderedY := regressorReOrderData(regressorGetFeature(data, 1), data, y)
+
+		So(regressorOrderedData[1][1], ShouldEqual, 3.0)
+		So(regressorOrderedY[0], ShouldEqual, 2)
+
+		// Is split being updated properly based on threshold
+		leftData, leftY, rightData, rightY = regressorUpdateSplit(leftData, leftY, rightData, rightY, 1, 9.5)
+		So(len(leftData), ShouldEqual, 3)
+		So(len(rightData), ShouldEqual, 1)
+
+		// Is the root Node null when tree is not trained?
+		regressorTreetree := NewDecisionTreeRegressor("mae", -1)
+		So(regressorTreetree.RootNode, ShouldBeNil)
+		So(regressorTreetree.triedSplits, ShouldBeEmpty)
+
+	})
+
+}

From 2d2af0a58f54044bdbe10238e4aba88f2a8d45cf Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Tue, 28 Jul 2020 14:17:18 +0530
Subject: [PATCH 11/24] Removing Clutter

Partial Modularization of best split method. Shorten method by declaring variables in same line as well.

Also removing redundant functions, and adding into cart_utils.
---
 trees/cart_classifier.go | 194 +++++++++++----------------------------
 trees/cart_regressor.go  | 162 ++++++++++----------------------
 trees/cart_test.go       |   9 +-
 trees/cart_utils.go      |  74 +++++++++++++++
 4 files changed, 181 insertions(+), 258 deletions(-)
 create mode 100644 trees/cart_utils.go

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 9ae3b8e..c2ba59c 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -90,6 +90,16 @@ func entropy(y []int64, labels []int64) (float64, int64) {
 	return entropy, maxLabel
 }
 
+func calculateClassificationLoss(y []int64, labels []int64, criterion string) (float64, int64) {
+	if criterion == GINI {
+		return giniImpurity(y, labels)
+	} else if criterion == ENTROPY {
+		return entropy(y, labels)
+	} else {
+		panic("Invalid impurity function, choose from GINI or ENTROPY")
+	}
+}
+
 // Split the data into left node and right node based on feature and threshold
 func classifierCreateSplit(data [][]float64, feature int64, y []int64, threshold float64) ([][]float64, [][]float64, []int64, []int64) {
 	var left [][]float64
@@ -111,37 +121,6 @@ func classifierCreateSplit(data [][]float64, feature int64, y []int64, threshold
 	return left, right, lefty, righty
 }
 
-// Helper Function to check if data point is unique or not.
-// We will use this to isolate unique values of a feature
-func classifierStringInSlice(a float64, list []float64) bool {
-	for _, b := range list {
-		if b == a {
-			return true
-		}
-	}
-	return false
-}
-
-// Isolate only unique values. This way, we can try only unique splits and not redundant ones.
-func classifierFindUnique(data []float64) []float64 {
-	var unique []float64
-	for i := range data {
-		if !classifierStringInSlice(data[i], unique) {
-			unique = append(unique, data[i])
-		}
-	}
-	return unique
-}
-
-// Isolate only the feature being considered for splitting. Reduces the complexity in managing splits.
-func classifierGetFeature(data [][]float64, feature int64) []float64 {
-	var featureVals []float64
-	for i := range data {
-		featureVals = append(featureVals, data[i][feature])
-	}
-	return featureVals
-}
-
 // Function to Create New Decision Tree Classifier.
 // It assigns all of the hyperparameters by user into the tree attributes.
 func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64) *CARTDecisionTreeClassifier {
@@ -153,19 +132,6 @@ func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64)
 	return &tree
 }
 
-// Make sure that split being considered has not been done before.
-// Else we will unnecessarily try splits that won't improve Impurity.
-func classifierValidate(triedSplits [][]float64, feature int64, threshold float64) bool {
-	for i := range triedSplits {
-		split := triedSplits[i]
-		featureTried, thresholdTried := split[0], split[1]
-		if int64(featureTried) == feature && thresholdTried == threshold {
-			return false
-		}
-	}
-	return true
-}
-
 // Reorder the data by feature being considered. Optimizes code by reducing the number of times we have to loop over data for splitting
 func classifierReOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64, []int64) {
 	s := NewSlice(featureVal)
@@ -202,7 +168,7 @@ func classifierUpdateSplit(left [][]float64, lefty []int64, right [][]float64, r
 func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) {
 	var emptyNode classifierNode
 
-	data := classifierConvertInstancesToProblemVec(X)
+	data := convertInstancesToProblemVec(X)
 	y := classifierConvertInstancesToLabelVec(X)
 	emptyNode = classifierBestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)
 
@@ -221,40 +187,29 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 	}
 
 	numFeatures := len(data[0])
-	var bestGini float64
-	var origGini float64
+	var bestGini, origGini float64
 
 	// Calculate loss based on Criterion Specified by user
-	if criterion == GINI {
-		origGini, upperNode.LeftLabel = giniImpurity(y, labels)
-	} else if criterion == ENTROPY {
-		origGini, upperNode.LeftLabel = entropy(y, labels)
-	} else {
-		panic("Invalid impurity function, choose from GINI or ENTROPY")
-	}
+	origGini, upperNode.LeftLabel = calculateClassificationLoss(y, labels, criterion)
 
 	bestGini = origGini
 
-	bestLeft := data
-	bestRight := data
-	bestLefty := y
-	bestRighty := y
+	bestLeft, bestRight, bestLefty, bestRighty := data, data, y, y
 
 	numData := len(data)
 
-	bestLeftGini := bestGini
-	bestRightGini := bestGini
+	bestLeftGini, bestRightGini := bestGini, bestGini
 
 	upperNode.Use_not = true
 
-	var leftN classifierNode
-	var rightN classifierNode
+	var leftN, rightN classifierNode
+
 	// Iterate over all features
 	for i := 0; i < numFeatures; i++ {
-		featureVal := classifierGetFeature(data, int64(i))
-		unique := classifierFindUnique(featureVal)
+
+		featureVal := getFeature(data, int64(i))
+		unique := findUnique(featureVal)
 		sort.Float64s(unique)
-		numUnique := len(unique)
 
 		sortData, sortY := classifierReOrderData(featureVal, data, y)
 
@@ -263,53 +218,43 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 		var left, right [][]float64
 		var lefty, righty []int64
 		// Iterate over all possible thresholds for that feature
-		for j := range unique {
-			if j != (numUnique - 1) {
-				threshold := (unique[j] + unique[j+1]) / 2
-				// Ensure that same split has not been made before
-				if classifierValidate(tree.triedSplits, int64(i), threshold) {
-					// We need to split data from fresh when considering new feature for the first time.
-					// Otherwise, we need to update the split by moving data points from left to right.
-					if firstTime {
-						left, right, lefty, righty = classifierCreateSplit(sortData, int64(i), sortY, threshold)
-						firstTime = false
-					} else {
-						left, lefty, right, righty = classifierUpdateSplit(left, lefty, right, righty, int64(i), threshold)
-					}
+		for j := 0; j < len(unique)-1; j++ {
 
-					var leftGini float64
-					var rightGini float64
-					var leftLabels int64
-					var rightLabels int64
-
-					if criterion == GINI {
-						leftGini, leftLabels = giniImpurity(lefty, labels)
-						rightGini, rightLabels = giniImpurity(righty, labels)
-					} else if criterion == ENTROPY {
-						leftGini, leftLabels = entropy(lefty, labels)
-						rightGini, rightLabels = entropy(righty, labels)
-					}
-					// Calculate weighted gini impurity of child nodes
-					subGini := (leftGini * float64(len(left)) / float64(numData)) + (rightGini * float64(len(right)) / float64(numData))
-
-					// If we find a split that reduces impurity
-					if subGini < bestGini {
-						bestGini = subGini
-						bestLeft = left
-						bestRight = right
-						bestLefty = lefty
-						bestRighty = righty
-						upperNode.Threshold = threshold
-						upperNode.Feature = int64(i)
-
-						upperNode.LeftLabel = leftLabels
-						upperNode.RightLabel = rightLabels
-
-						bestLeftGini = leftGini
-						bestRightGini = rightGini
-					}
+			threshold := (unique[j] + unique[j+1]) / 2
+			// Ensure that same split has not been made before
+			if validate(tree.triedSplits, int64(i), threshold) {
+				// We need to split data from fresh when considering new feature for the first time.
+				// Otherwise, we need to update the split by moving data points from left to right.
+				if firstTime {
+					left, right, lefty, righty = classifierCreateSplit(sortData, int64(i), sortY, threshold)
+					firstTime = false
+				} else {
+					left, lefty, right, righty = classifierUpdateSplit(left, lefty, right, righty, int64(i), threshold)
 				}
 
+				var leftGini, rightGini float64
+				var leftLabels, rightLabels int64
+
+				leftGini, leftLabels = calculateClassificationLoss(lefty, labels, criterion)
+				rightGini, rightLabels = calculateClassificationLoss(righty, labels, criterion)
+
+				// Calculate weighted gini impurity of child nodes
+				subGini := (leftGini * float64(len(left)) / float64(numData)) + (rightGini * float64(len(right)) / float64(numData))
+
+				// If we find a split that reduces impurity
+				if subGini < bestGini {
+					bestGini = subGini
+
+					bestLeft, bestRight = left, right
+
+					bestLefty, bestRighty = lefty, righty
+
+					upperNode.Threshold, upperNode.Feature = threshold, int64(i)
+
+					upperNode.LeftLabel, upperNode.RightLabel = leftLabels, rightLabels
+
+					bestLeftGini, bestRightGini = leftGini, rightGini
+				}
 			}
 		}
 	}
@@ -366,10 +311,8 @@ func classifierPrintTreeFromNode(tree classifierNode, spacing string) string {
 		returnString += spacing + "---> True" + "\n"
 		returnString += "  " + spacing + "PREDICT    "
 		returnString += strconv.FormatInt(tree.LeftLabel, 10) + "\n"
-
 	}
 	if tree.Right == nil {
-
 		returnString += spacing + "---> False" + "\n"
 		returnString += "  " + spacing + "PREDICT    "
 		returnString += strconv.FormatInt(tree.RightLabel, 10) + "\n"
@@ -409,7 +352,7 @@ func classifierPredictSingle(tree classifierNode, instance []float64) int64 {
 // Given test data, return predictions for every datapoint. calls classifierPredictFromNode
 func (tree *CARTDecisionTreeClassifier) Predict(X_test base.FixedDataGrid) []int64 {
 	root := *tree.RootNode
-	test := classifierConvertInstancesToProblemVec(X_test)
+	test := convertInstancesToProblemVec(X_test)
 	return classifierPredictFromNode(root, test)
 }
 
@@ -429,7 +372,7 @@ func classifierPredictFromNode(tree classifierNode, test [][]float64) []int64 {
 // Calls classifierEvaluateFromNode
 func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) float64 {
 	rootNode := *tree.RootNode
-	xTest := classifierConvertInstancesToProblemVec(test)
+	xTest := convertInstancesToProblemVec(test)
 	yTest := classifierConvertInstancesToLabelVec(test)
 	return classifierEvaluateFromNode(rootNode, xTest, yTest)
 }
@@ -447,31 +390,6 @@ func classifierEvaluateFromNode(tree classifierNode, xTest [][]float64, yTest []
 	return accuracy
 }
 
-// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
-func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
-	// Allocate problem array
-	_, rows := X.Size()
-	problemVec := make([][]float64, rows)
-
-	// Retrieve numeric non-class Attributes
-	numericAttrs := base.NonClassFloatAttributes(X)
-	numericAttrSpecs := base.ResolveAttributes(X, numericAttrs)
-
-	// Convert each row
-	X.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
-		// Allocate a new row
-		probRow := make([]float64, len(numericAttrSpecs))
-		// Read out the row
-		for i, _ := range numericAttrSpecs {
-			probRow[i] = base.UnpackBytesToFloat(row[i])
-		}
-		// Add the row
-		problemVec[rowNo] = probRow
-		return true, nil
-	})
-	return problemVec
-}
-
 // Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) []int64 {
 	// Get the class Attributes
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index 34b7880..1d2d326 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -81,6 +81,16 @@ func mseImpurity(y []float64) (float64, float64) {
 	return meanSquaredError(y, yHat), yHat
 }
 
+func calculateRegressionLoss(y []float64, criterion string) (float64, float64) {
+	if criterion == MAE {
+		return maeImpurity(y)
+	} else if criterion == MSE {
+		return mseImpurity(y)
+	} else {
+		panic("Invalid impurity function, choose from MAE or MSE")
+	}
+}
+
 // Split the data into left and right based on trehsold and feature.
 func regressorCreateSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
 	var left [][]float64
@@ -102,39 +112,6 @@ func regressorCreateSplit(data [][]float64, feature int64, y []float64, threshol
 	return left, right, lefty, righty
 }
 
-// Helper function for finding unique values.
-// Used for isolating unique values in a feature.
-func regressorStringInSlice(a float64, list []float64) bool {
-	for _, b := range list {
-		if b == a {
-			return true
-		}
-	}
-	return false
-}
-
-// Isolate only unique values.
-// This way we can only try unique splits.
-func regressorFindUnique(data []float64) []float64 {
-	var unique []float64
-	for i := range data {
-		if !regressorStringInSlice(data[i], unique) {
-			unique = append(unique, data[i])
-		}
-	}
-	return unique
-}
-
-// Extract out a single feature from data.
-// Reduces complexity in managing splits and sorting
-func regressorGetFeature(data [][]float64, feature int64) []float64 {
-	var featureVals []float64
-	for i := range data {
-		featureVals = append(featureVals, data[i][feature])
-	}
-	return featureVals
-}
-
 // Interface for creating new Decision Tree Regressor
 func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDecisionTreeRegressor {
 	var tree CARTDecisionTreeRegressor
@@ -143,19 +120,6 @@ func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDecisionTre
 	return &tree
 }
 
-// Validate that the split being tested has not been done before.
-// This prevents redundant splits from hapenning.
-func regressorValidate(triedSplits [][]float64, feature int64, threshold float64) bool {
-	for i := range triedSplits {
-		split := triedSplits[i]
-		featureTried, thresholdTried := split[0], split[1]
-		if int64(featureTried) == feature && thresholdTried == threshold {
-			return false
-		}
-	}
-	return true
-}
-
 // Re order data based on a feature for optimizing code
 // Helps in updating splits without reiterating entire dataset
 func regressorReOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
@@ -204,6 +168,7 @@ func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) {
 // Recursive function - stops if maxDepth is reached or nodes are pure
 func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, upperNode regressorNode, criterion string, maxDepth int64, depth int64) regressorNode {
 
+	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
 	depth++
 
 	if depth > maxDepth && maxDepth != -1 {
@@ -211,39 +176,27 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 	}
 
 	numFeatures := len(data[0])
-	var bestLoss float64
-	var origLoss float64
+	var bestLoss, origLoss float64
 
-	if criterion == MAE {
-		origLoss, upperNode.LeftPred = maeImpurity(y)
-	} else if criterion == MSE {
-		origLoss, upperNode.LeftPred = mseImpurity(y)
-	} else {
-		panic("Invalid impurity function, choose from MAE or MSE")
-	}
+	origLoss, upperNode.LeftPred = calculateRegressionLoss(y, criterion)
 
 	bestLoss = origLoss
 
-	bestLeft := data
-	bestRight := data
-	bestLefty := y
-	bestRighty := y
+	bestLeft, bestRight, bestLefty, bestRighty := data, data, y, y
 
 	numData := len(data)
 
-	bestLeftLoss := bestLoss
-	bestRightLoss := bestLoss
+	bestLeftLoss, bestRightLoss := bestLoss, bestLoss
 
 	upperNode.Use_not = true
 
-	var leftN regressorNode
-	var rightN regressorNode
+	var leftN, rightN regressorNode
 	// Iterate over all features
 	for i := 0; i < numFeatures; i++ {
-		featureVal := regressorGetFeature(data, int64(i))
-		unique := regressorFindUnique(featureVal)
+
+		featureVal := getFeature(data, int64(i))
+		unique := findUnique(featureVal)
 		sort.Float64s(unique)
-		numUnique := len(unique)
 
 		sortData, sortY := regressorReOrderData(featureVal, data, y)
 
@@ -252,49 +205,36 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 		var left, right [][]float64
 		var lefty, righty []float64
 
-		for j := range unique {
-			if j != (numUnique - 1) {
-				threshold := (unique[j] + unique[j+1]) / 2
-				if regressorValidate(tree.triedSplits, int64(i), threshold) {
-					if firstTime {
-						left, right, lefty, righty = regressorCreateSplit(sortData, int64(i), sortY, threshold)
-						firstTime = false
-					} else {
-						left, lefty, right, righty = regressorUpdateSplit(left, lefty, right, righty, int64(i), threshold)
-					}
-
-					var leftLoss float64
-					var rightLoss float64
-					var leftPred float64
-					var rightPred float64
-
-					if criterion == MAE {
-						leftLoss, leftPred = maeImpurity(lefty)
-						rightLoss, rightPred = maeImpurity(righty)
-					} else if criterion == MSE {
-						leftLoss, leftPred = mseImpurity(lefty)
-						rightLoss, rightPred = mseImpurity(righty)
-					}
-
-					subLoss := (leftLoss * float64(len(left)) / float64(numData)) + (rightLoss * float64(len(right)) / float64(numData))
-
-					if subLoss < bestLoss {
-						bestLoss = subLoss
-						bestLeft = left
-						bestRight = right
-						bestLefty = lefty
-						bestRighty = righty
-						upperNode.Threshold = threshold
-						upperNode.Feature = int64(i)
-
-						upperNode.LeftPred = leftPred
-						upperNode.RightPred = rightPred
-
-						bestLeftLoss = leftLoss
-						bestRightLoss = rightLoss
-					}
+		for j := 0; j < len(unique)-1; j++ {
+			threshold := (unique[j] + unique[j+1]) / 2
+			if validate(tree.triedSplits, int64(i), threshold) {
+				if firstTime {
+					left, right, lefty, righty = regressorCreateSplit(sortData, int64(i), sortY, threshold)
+					firstTime = false
+				} else {
+					left, lefty, right, righty = regressorUpdateSplit(left, lefty, right, righty, int64(i), threshold)
 				}
 
+				var leftLoss, rightLoss float64
+				var leftPred, rightPred float64
+
+				leftLoss, leftPred = calculateRegressionLoss(lefty, criterion)
+				rightLoss, rightPred = calculateRegressionLoss(righty, criterion)
+
+				subLoss := (leftLoss * float64(len(left)) / float64(numData)) + (rightLoss * float64(len(right)) / float64(numData))
+
+				if subLoss < bestLoss {
+					bestLoss = subLoss
+
+					bestLeft, bestRight = left, right
+					bestLefty, bestRighty = lefty, righty
+
+					upperNode.Threshold, upperNode.Feature = threshold, int64(i)
+
+					upperNode.LeftPred, upperNode.RightPred = leftPred, rightPred
+
+					bestLeftLoss, bestRightLoss = leftLoss, rightLoss
+				}
 			}
 		}
 	}
@@ -312,19 +252,16 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 			if leftN.Use_not == true {
 				upperNode.Left = &leftN
 			}
-
 		}
+
 		if bestRightLoss > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			rightN = regressorBestSplit(tree, bestRight, bestRighty, rightN, criterion, maxDepth, depth)
 			if rightN.Use_not == true {
 				upperNode.Right = &rightN
 			}
-
 		}
-
 	}
-
 	return upperNode
 }
 
@@ -349,20 +286,17 @@ func regressorPrintTreeFromNode(tree regressorNode, spacing string) string {
 		returnString += fmt.Sprintf("%.3f", tree.LeftPred) + "\n"
 	}
 	if tree.Right == nil {
-
 		returnString += spacing + "---> False" + "\n"
 		returnString += "  " + spacing + "PREDICT    "
 		returnString += fmt.Sprintf("%.3f", tree.RightPred) + "\n"
 	}
 
 	if tree.Left != nil {
-		// fmt.Println(spacing + "---> True")
 		returnString += spacing + "---> True" + "\n"
 		returnString += regressorPrintTreeFromNode(*tree.Left, spacing+"  ")
 	}
 
 	if tree.Right != nil {
-		// fmt.Println(spacing + "---> False")
 		returnString += spacing + "---> False" + "\n"
 		returnString += regressorPrintTreeFromNode(*tree.Right, spacing+"  ")
 	}
diff --git a/trees/cart_test.go b/trees/cart_test.go
index 047392a..50387b9 100644
--- a/trees/cart_test.go
+++ b/trees/cart_test.go
@@ -38,10 +38,10 @@ func TestRegressor(t *testing.T) {
 		So(len(righty), ShouldEqual, 2)
 
 		// Is isolating unique values working properly
-		So(len(classifierFindUnique([]float64{10, 1, 1})), ShouldEqual, 2)
+		So(len(findUnique([]float64{10, 1, 1})), ShouldEqual, 2)
 
 		// is data reordered correctly
-		orderedData, orderedY := classifierReOrderData(classifierGetFeature(classifierData, 1), classifierData, classifiery)
+		orderedData, orderedY := classifierReOrderData(getFeature(classifierData, 1), classifierData, classifiery)
 		fmt.Println(orderedData)
 		fmt.Println(orderedY)
 		So(orderedData[1][1], ShouldEqual, 3.0)
@@ -85,11 +85,8 @@ func TestRegressor(t *testing.T) {
 		So(len(rightData), ShouldEqual, 2)
 		So(len(righty), ShouldEqual, 2)
 
-		// Is isolating unique values working properly
-		So(len(regressorFindUnique([]float64{10, 1, 1})), ShouldEqual, 2)
-
 		// is data reordered correctly
-		regressorOrderedData, regressorOrderedY := regressorReOrderData(regressorGetFeature(data, 1), data, y)
+		regressorOrderedData, regressorOrderedY := regressorReOrderData(getFeature(data, 1), data, y)
 
 		So(regressorOrderedData[1][1], ShouldEqual, 3.0)
 		So(regressorOrderedY[0], ShouldEqual, 2)
diff --git a/trees/cart_utils.go b/trees/cart_utils.go
new file mode 100644
index 0000000..d3b9b4a
--- /dev/null
+++ b/trees/cart_utils.go
@@ -0,0 +1,74 @@
+package trees
+
+import (
+	"github.com/sjwhitworth/golearn/base"
+)
+
+// Helper Function to check if data point is unique or not.
+// We will use this to isolate unique values of a feature
+func stringInSlice(a float64, list []float64) bool {
+	for _, b := range list {
+		if b == a {
+			return true
+		}
+	}
+	return false
+}
+
+// Isolate only unique values. This way, we can try only unique splits and not redundant ones.
+func findUnique(data []float64) []float64 {
+	var unique []float64
+	for i := range data {
+		if !stringInSlice(data[i], unique) {
+			unique = append(unique, data[i])
+		}
+	}
+	return unique
+}
+
+// Isolate only the feature being considered for splitting. Reduces the complexity in managing splits.
+func getFeature(data [][]float64, feature int64) []float64 {
+	var featureVals []float64
+	for i := range data {
+		featureVals = append(featureVals, data[i][feature])
+	}
+	return featureVals
+}
+
+// Make sure that split being considered has not been done before.
+// Else we will unnecessarily try splits that won't improve Impurity.
+func validate(triedSplits [][]float64, feature int64, threshold float64) bool {
+	for i := range triedSplits {
+		split := triedSplits[i]
+		featureTried, thresholdTried := split[0], split[1]
+		if int64(featureTried) == feature && thresholdTried == threshold {
+			return false
+		}
+	}
+	return true
+}
+
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
+func convertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
+	// Allocate problem array
+	_, rows := X.Size()
+	problemVec := make([][]float64, rows)
+
+	// Retrieve numeric non-class Attributes
+	numericAttrs := base.NonClassFloatAttributes(X)
+	numericAttrSpecs := base.ResolveAttributes(X, numericAttrs)
+
+	// Convert each row
+	X.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
+		// Allocate a new row
+		probRow := make([]float64, len(numericAttrSpecs))
+		// Read out the row
+		for i, _ := range numericAttrSpecs {
+			probRow[i] = base.UnpackBytesToFloat(row[i])
+		}
+		// Add the row
+		problemVec[rowNo] = probRow
+		return true, nil
+	})
+	return problemVec
+}

From 1954aae7a685bdcef60080aa096cc6033e9012aa Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Thu, 30 Jul 2020 10:27:16 +0530
Subject: [PATCH 12/24] Changing name of Use_not

---
 trees/cart_classifier.go | 25 ++++++++++++-------------
 trees/cart_regressor.go  | 22 +++++++++++-----------
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index c2ba59c..17a3ee7 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -18,14 +18,13 @@ const (
 // CNode is Node struct for Decision Tree Classifier.
 // It holds the information for each split (which feature to use, what threshold, and which label to assign for each side of the split)
 type classifierNode struct {
-	Left       *classifierNode
-	Right      *classifierNode
-	Threshold  float64
-	Feature    int64
-	LeftLabel  int64
-	RightLabel int64
-	Use_not    bool
-	maxDepth   int64
+	Left         *classifierNode
+	Right        *classifierNode
+	Threshold    float64
+	Feature      int64
+	LeftLabel    int64
+	RightLabel   int64
+	isNodeNeeded bool
 }
 
 // CARTDecisionTreeClassifier: Tree struct for Decision Tree Classifier
@@ -163,7 +162,7 @@ func classifierUpdateSplit(left [][]float64, lefty []int64, right [][]float64, r
 	return left, lefty, right, righty
 }
 
-// Fit - Creates an Emppty Root Node
+// Fit - Creates an Emppty Root Node2
 // Trains the tree by calling recursive function classifierBestSplit
 func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) {
 	var emptyNode classifierNode
@@ -200,7 +199,7 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 
 	bestLeftGini, bestRightGini := bestGini, bestGini
 
-	upperNode.Use_not = true
+	upperNode.isNodeNeeded = true
 
 	var leftN, rightN classifierNode
 
@@ -260,7 +259,7 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 	}
 	// If no split was found, we don't want to use this node, so we will flag it
 	if bestGini == origGini {
-		upperNode.Use_not = false
+		upperNode.isNodeNeeded = false
 		return upperNode
 	}
 	// Until nodes are not pure
@@ -271,7 +270,7 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			// Recursive splitting logic
 			leftN = classifierBestSplit(tree, bestLeft, bestLefty, labels, leftN, criterion, maxDepth, depth)
-			if leftN.Use_not == true {
+			if leftN.isNodeNeeded == true {
 				upperNode.Left = &leftN
 			}
 
@@ -281,7 +280,7 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			// Recursive splitting logic
 			rightN = classifierBestSplit(tree, bestRight, bestRighty, labels, rightN, criterion, maxDepth, depth)
-			if rightN.Use_not == true {
+			if rightN.isNodeNeeded == true {
 				upperNode.Right = &rightN
 			}
 
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index 1d2d326..b94da1d 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -19,13 +19,13 @@ const (
 // It holds the information for each split
 // Which feature to use, threshold, left prediction and right prediction
 type regressorNode struct {
-	Left      *regressorNode
-	Right     *regressorNode
-	Threshold float64
-	Feature   int64
-	LeftPred  float64
-	RightPred float64
-	Use_not   bool
+	Left         *regressorNode
+	Right        *regressorNode
+	Threshold    float64
+	Feature      int64
+	LeftPred     float64
+	RightPred    float64
+	isNodeNeeded bool
 }
 
 // CARTDecisionTreeRegressor - Tree struct for Decision Tree Regressor
@@ -188,7 +188,7 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 
 	bestLeftLoss, bestRightLoss := bestLoss, bestLoss
 
-	upperNode.Use_not = true
+	upperNode.isNodeNeeded = true
 
 	var leftN, rightN regressorNode
 	// Iterate over all features
@@ -240,7 +240,7 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 	}
 
 	if bestLoss == origLoss {
-		upperNode.Use_not = false
+		upperNode.isNodeNeeded = false
 		return upperNode
 	}
 
@@ -249,7 +249,7 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 		if bestLeftLoss > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			leftN = regressorBestSplit(tree, bestLeft, bestLefty, leftN, criterion, maxDepth, depth)
-			if leftN.Use_not == true {
+			if leftN.isNodeNeeded == true {
 				upperNode.Left = &leftN
 			}
 		}
@@ -257,7 +257,7 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 		if bestRightLoss > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			rightN = regressorBestSplit(tree, bestRight, bestRighty, rightN, criterion, maxDepth, depth)
-			if rightN.Use_not == true {
+			if rightN.isNodeNeeded == true {
 				upperNode.Right = &rightN
 			}
 		}

From d587340e4a995f17aa70dc52507e7d8c91341480 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Thu, 30 Jul 2020 11:21:06 +0530
Subject: [PATCH 13/24] Renaming Impurity Functions

---
 trees/cart_classifier.go | 8 ++++----
 trees/cart_regressor.go  | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 17a3ee7..fee9043 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -39,7 +39,7 @@ type CARTDecisionTreeClassifier struct {
 }
 
 // Calculate Gini Impurity of Target Labels
-func giniImpurity(y []int64, labels []int64) (float64, int64) {
+func computeGiniImpurityAndModeLabel(y []int64, labels []int64) (float64, int64) {
 	nInstances := len(y)
 	gini := 0.0
 	maxLabelCount := 0
@@ -62,7 +62,7 @@ func giniImpurity(y []int64, labels []int64) (float64, int64) {
 }
 
 // Calculate Entropy loss of Target Labels
-func entropy(y []int64, labels []int64) (float64, int64) {
+func computeEntropyAndModeLabel(y []int64, labels []int64) (float64, int64) {
 	nInstances := len(y)
 	entropy := 0.0
 	maxLabelCount := 0
@@ -91,9 +91,9 @@ func entropy(y []int64, labels []int64) (float64, int64) {
 
 func calculateClassificationLoss(y []int64, labels []int64, criterion string) (float64, int64) {
 	if criterion == GINI {
-		return giniImpurity(y, labels)
+		return computeGiniImpurityAndModeLabel(y, labels)
 	} else if criterion == ENTROPY {
-		return entropy(y, labels)
+		return computeEntropyAndModeLabel(y, labels)
 	} else {
 		panic("Invalid impurity function, choose from GINI or ENTROPY")
 	}
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index b94da1d..3509a15 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -59,7 +59,7 @@ func meanAbsoluteError(y []float64, yBar float64) float64 {
 }
 
 // Turn Mean Absolute Error into impurity function for decision trees.
-func maeImpurity(y []float64) (float64, float64) {
+func computeMaeImpurityAndAverage(y []float64) (float64, float64) {
 	yHat := average(y)
 	return meanAbsoluteError(y, yHat), yHat
 }
@@ -76,16 +76,16 @@ func meanSquaredError(y []float64, yBar float64) float64 {
 }
 
 // Convert mean squared error into impurity function for decision trees
-func mseImpurity(y []float64) (float64, float64) {
+func computeMseImpurityAndAverage(y []float64) (float64, float64) {
 	yHat := average(y)
 	return meanSquaredError(y, yHat), yHat
 }
 
 func calculateRegressionLoss(y []float64, criterion string) (float64, float64) {
 	if criterion == MAE {
-		return maeImpurity(y)
+		return computeMaeImpurityAndAverage(y)
 	} else if criterion == MSE {
-		return mseImpurity(y)
+		return computeMseImpurityAndAverage(y)
 	} else {
 		panic("Invalid impurity function, choose from MAE or MSE")
 	}

From 7276108661eec19f3da12131389eeb2013314075 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Thu, 30 Jul 2020 11:48:50 +0530
Subject: [PATCH 14/24] Adding Documentation

Comparision in performance and implementation with sklearn.
---
 examples/trees/cart.go | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/examples/trees/cart.go b/examples/trees/cart.go
index 833e287..f465d54 100644
--- a/examples/trees/cart.go
+++ b/examples/trees/cart.go
@@ -9,6 +9,22 @@ import (
 )
 
 func main() {
+	/* Performance of CART Algorithm:
+
+		Training Time for Titanic Dataset ≈ 713 µs
+		Prediction Time for Titanic Datset ≈ 133 µs
+
+		Sklearn:
+		Training Time for Titanic Dataset ≈ 8.8 µs
+		Prediction Time for Titanic Datset ≈ 7.87 µs
+
+		This implementation and sci-kit learn produce the exact same tree for the exact same dataset.
+		Predictions on the same test set also yield the exact same accuracy.
+
+		This implementation is optimized to prevent redundant iterations over the dataset, but it is not completely optimized. Also, sklearn makes use of numpy to access column easily, whereas here a complete iteration is required.
+	 	In terms of Hyperparameters, this implmentation gives you the ability to choose the impurity function and the maxDepth.
+		Many of the other hyperparameters used in sklearn are not here, but pruning and impurity is included.
+	*/
 
 	// Load Titanic Data For classification
 	classificationData, err := base.ParseCSVToInstances("../datasets/titanic.csv", false)

From 7f8ce6d1138c7d045073cdd9025034710ab60802 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Fri, 31 Jul 2020 11:01:20 +0530
Subject: [PATCH 15/24] Removing Panics

---
 trees/cart_classifier.go | 75 ++++++++++++++++++++++++++--------------
 trees/cart_regressor.go  | 64 ++++++++++++++++++++++------------
 trees/cart_test.go       |  8 ++---
 3 files changed, 96 insertions(+), 51 deletions(-)

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index fee9043..828f2dc 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -1,6 +1,7 @@
 package trees
 
 import (
+	"errors"
 	"fmt"
 	"math"
 	"sort"
@@ -89,13 +90,15 @@ func computeEntropyAndModeLabel(y []int64, labels []int64) (float64, int64) {
 	return entropy, maxLabel
 }
 
-func calculateClassificationLoss(y []int64, labels []int64, criterion string) (float64, int64) {
+func calculateClassificationLoss(y []int64, labels []int64, criterion string) (float64, int64, error) {
 	if criterion == GINI {
-		return computeGiniImpurityAndModeLabel(y, labels)
+		loss, modeLabel := computeGiniImpurityAndModeLabel(y, labels)
+		return loss, modeLabel, nil
 	} else if criterion == ENTROPY {
-		return computeEntropyAndModeLabel(y, labels)
+		loss, modeLabel := computeEntropyAndModeLabel(y, labels)
+		return loss, modeLabel, nil
 	} else {
-		panic("Invalid impurity function, choose from GINI or ENTROPY")
+		return 0, 0, errors.New("Invalid impurity function, choose from GINI or ENTROPY")
 	}
 }
 
@@ -164,32 +167,44 @@ func classifierUpdateSplit(left [][]float64, lefty []int64, right [][]float64, r
 
 // Fit - Creates an Emppty Root Node2
 // Trains the tree by calling recursive function classifierBestSplit
-func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) {
+func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) error {
 	var emptyNode classifierNode
+	var err error
 
 	data := convertInstancesToProblemVec(X)
-	y := classifierConvertInstancesToLabelVec(X)
-	emptyNode = classifierBestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)
+	y, err := classifierConvertInstancesToLabelVec(X)
+	if err != nil {
+		return err
+	}
 
+	emptyNode, err = classifierBestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)
+
+	if err != nil {
+		return err
+	}
 	tree.RootNode = &emptyNode
+	return nil
 }
 
 // Iterativly find and record the best split
 // Stop If depth reaches maxDepth or nodes are pure
-func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, labels []int64, upperNode classifierNode, criterion string, maxDepth int64, depth int64) classifierNode {
+func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []int64, labels []int64, upperNode classifierNode, criterion string, maxDepth int64, depth int64) (classifierNode, error) {
 
 	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
 	depth++
 
 	if maxDepth != -1 && depth > maxDepth {
-		return upperNode
+		return upperNode, nil
 	}
 
 	numFeatures := len(data[0])
 	var bestGini, origGini float64
-
+	var err error
 	// Calculate loss based on Criterion Specified by user
-	origGini, upperNode.LeftLabel = calculateClassificationLoss(y, labels, criterion)
+	origGini, upperNode.LeftLabel, err = calculateClassificationLoss(y, labels, criterion)
+	if err != nil {
+		return upperNode, err
+	}
 
 	bestGini = origGini
 
@@ -234,8 +249,8 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 				var leftGini, rightGini float64
 				var leftLabels, rightLabels int64
 
-				leftGini, leftLabels = calculateClassificationLoss(lefty, labels, criterion)
-				rightGini, rightLabels = calculateClassificationLoss(righty, labels, criterion)
+				leftGini, leftLabels, _ = calculateClassificationLoss(lefty, labels, criterion)
+				rightGini, rightLabels, _ = calculateClassificationLoss(righty, labels, criterion)
 
 				// Calculate weighted gini impurity of child nodes
 				subGini := (leftGini * float64(len(left)) / float64(numData)) + (rightGini * float64(len(right)) / float64(numData))
@@ -260,7 +275,7 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 	// If no split was found, we don't want to use this node, so we will flag it
 	if bestGini == origGini {
 		upperNode.isNodeNeeded = false
-		return upperNode
+		return upperNode, nil
 	}
 	// Until nodes are not pure
 	if bestGini > 0 {
@@ -269,7 +284,10 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 		if bestLeftGini > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			// Recursive splitting logic
-			leftN = classifierBestSplit(tree, bestLeft, bestLefty, labels, leftN, criterion, maxDepth, depth)
+			leftN, err = classifierBestSplit(tree, bestLeft, bestLefty, labels, leftN, criterion, maxDepth, depth)
+			if err != nil {
+				return upperNode, err
+			}
 			if leftN.isNodeNeeded == true {
 				upperNode.Left = &leftN
 			}
@@ -279,7 +297,10 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 		if bestRightGini > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			// Recursive splitting logic
-			rightN = classifierBestSplit(tree, bestRight, bestRighty, labels, rightN, criterion, maxDepth, depth)
+			rightN, err = classifierBestSplit(tree, bestRight, bestRighty, labels, rightN, criterion, maxDepth, depth)
+			if err != nil {
+				return upperNode, err
+			}
 			if rightN.isNodeNeeded == true {
 				upperNode.Right = &rightN
 			}
@@ -288,7 +309,7 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 
 	}
 	// Return the node - contains all information regarding feature and threshold.
-	return upperNode
+	return upperNode, nil
 }
 
 // String : this function prints out entire tree for visualization.
@@ -369,11 +390,14 @@ func classifierPredictFromNode(tree classifierNode, test [][]float64) []int64 {
 // Given Test data and label, return the accuracy of the classifier.
 // First it retreives predictions from the data, then compares for accuracy.
 // Calls classifierEvaluateFromNode
-func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) float64 {
+func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) (float64, error) {
 	rootNode := *tree.RootNode
 	xTest := convertInstancesToProblemVec(test)
-	yTest := classifierConvertInstancesToLabelVec(test)
-	return classifierEvaluateFromNode(rootNode, xTest, yTest)
+	yTest, err := classifierConvertInstancesToLabelVec(test)
+	if err != nil {
+		return 0, err
+	}
+	return classifierEvaluateFromNode(rootNode, xTest, yTest), nil
 }
 
 // Retrieve predictions and then calculate accuracy.
@@ -390,20 +414,21 @@ func classifierEvaluateFromNode(tree classifierNode, xTest [][]float64, yTest []
 }
 
 // Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
-func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) []int64 {
+func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) ([]int64, error) {
 	// Get the class Attributes
 	classAttrs := X.AllClassAttributes()
 	// Only support 1 class Attribute
 	if len(classAttrs) != 1 {
-		panic(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs)))
+		return []int64{0}, errors.New(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs)))
+
 	}
 	// ClassAttribute must be numeric
 	if _, ok := classAttrs[0].(*base.FloatAttribute); !ok {
-		panic(fmt.Sprintf("%s: ClassAttribute must be a FloatAttribute", classAttrs[0]))
+		return []int64{0}, errors.New(fmt.Sprintf("%s: ClassAttribute must be a FloatAttribute", classAttrs[0]))
 	}
 	// Allocate return structure
 	_, rows := X.Size()
-	// labelVec := make([]float64, rows)
+
 	labelVec := make([]int64, rows)
 	// Resolve class Attribute specification
 	classAttrSpecs := base.ResolveAttributes(X, classAttrs)
@@ -411,5 +436,5 @@ func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) []int64 {
 		labelVec[rowNo] = int64(base.UnpackBytesToFloat(row[0]))
 		return true, nil
 	})
-	return labelVec
+	return labelVec, nil
 }
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index 3509a15..96d3405 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -1,6 +1,7 @@
 package trees
 
 import (
+	"errors"
 	"fmt"
 	"math"
 	"sort"
@@ -81,11 +82,13 @@ func computeMseImpurityAndAverage(y []float64) (float64, float64) {
 	return meanSquaredError(y, yHat), yHat
 }
 
-func calculateRegressionLoss(y []float64, criterion string) (float64, float64) {
+func calculateRegressionLoss(y []float64, criterion string) (float64, float64, error) {
 	if criterion == MAE {
-		return computeMaeImpurityAndAverage(y)
+		loss, avg := computeMaeImpurityAndAverage(y)
+		return loss, avg, nil
 	} else if criterion == MSE {
-		return computeMseImpurityAndAverage(y)
+		loss, avg := computeMseImpurityAndAverage(y)
+		return loss, avg, nil
 	} else {
 		panic("Invalid impurity function, choose from MAE or MSE")
 	}
@@ -154,31 +157,42 @@ func regressorUpdateSplit(left [][]float64, lefty []float64, right [][]float64,
 
 // Fit - Build the tree using the data
 // Creates empty root node and builds tree by calling regressorBestSplit
-func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) {
+func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) error {
 	var emptyNode regressorNode
+	var err error
+
 	data := regressorConvertInstancesToProblemVec(X)
-	y := regressorConvertInstancesToLabelVec(X)
-
-	emptyNode = regressorBestSplit(*tree, data, y, emptyNode, tree.criterion, tree.maxDepth, 0)
+	y, err := regressorConvertInstancesToLabelVec(X)
+	if err != nil {
+		return err
+	}
 
+	emptyNode, err = regressorBestSplit(*tree, data, y, emptyNode, tree.criterion, tree.maxDepth, 0)
+	if err != nil {
+		return err
+	}
 	tree.RootNode = &emptyNode
+	return nil
 }
 
 // Builds the tree by iteratively finding the best split.
 // Recursive function - stops if maxDepth is reached or nodes are pure
-func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, upperNode regressorNode, criterion string, maxDepth int64, depth int64) regressorNode {
+func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, upperNode regressorNode, criterion string, maxDepth int64, depth int64) (regressorNode, error) {
 
 	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
 	depth++
 
 	if depth > maxDepth && maxDepth != -1 {
-		return upperNode
+		return upperNode, nil
 	}
 
 	numFeatures := len(data[0])
 	var bestLoss, origLoss float64
-
-	origLoss, upperNode.LeftPred = calculateRegressionLoss(y, criterion)
+	var err error
+	origLoss, upperNode.LeftPred, err = calculateRegressionLoss(y, criterion)
+	if err != nil {
+		return upperNode, err
+	}
 
 	bestLoss = origLoss
 
@@ -218,8 +232,8 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 				var leftLoss, rightLoss float64
 				var leftPred, rightPred float64
 
-				leftLoss, leftPred = calculateRegressionLoss(lefty, criterion)
-				rightLoss, rightPred = calculateRegressionLoss(righty, criterion)
+				leftLoss, leftPred, _ = calculateRegressionLoss(lefty, criterion)
+				rightLoss, rightPred, _ = calculateRegressionLoss(righty, criterion)
 
 				subLoss := (leftLoss * float64(len(left)) / float64(numData)) + (rightLoss * float64(len(right)) / float64(numData))
 
@@ -241,14 +255,17 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 
 	if bestLoss == origLoss {
 		upperNode.isNodeNeeded = false
-		return upperNode
+		return upperNode, nil
 	}
 
 	if bestLoss > 0 {
 
 		if bestLeftLoss > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
-			leftN = regressorBestSplit(tree, bestLeft, bestLefty, leftN, criterion, maxDepth, depth)
+			leftN, err = regressorBestSplit(tree, bestLeft, bestLefty, leftN, criterion, maxDepth, depth)
+			if err != nil {
+				return upperNode, err
+			}
 			if leftN.isNodeNeeded == true {
 				upperNode.Left = &leftN
 			}
@@ -256,13 +273,16 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 
 		if bestRightLoss > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
-			rightN = regressorBestSplit(tree, bestRight, bestRighty, rightN, criterion, maxDepth, depth)
+			rightN, err = regressorBestSplit(tree, bestRight, bestRighty, rightN, criterion, maxDepth, depth)
+			if err != nil {
+				return upperNode, err
+			}
 			if rightN.isNodeNeeded == true {
 				upperNode.Right = &rightN
 			}
 		}
 	}
-	return upperNode
+	return upperNode, nil
 }
 
 // Print Tree for Visualtion - calls regressorPrintTreeFromNode()
@@ -367,20 +387,20 @@ func regressorConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
 }
 
 // Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
-func regressorConvertInstancesToLabelVec(X base.FixedDataGrid) []float64 {
+func regressorConvertInstancesToLabelVec(X base.FixedDataGrid) ([]float64, error) {
 	// Get the class Attributes
 	classAttrs := X.AllClassAttributes()
 	// Only support 1 class Attribute
 	if len(classAttrs) != 1 {
-		panic(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs)))
+		return []float64{0}, errors.New(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs)))
 	}
 	// ClassAttribute must be numeric
 	if _, ok := classAttrs[0].(*base.FloatAttribute); !ok {
-		panic(fmt.Sprintf("%s: ClassAttribute must be a FloatAttribute", classAttrs[0]))
+		return []float64{0}, errors.New(fmt.Sprintf("%s: ClassAttribute must be a FloatAttribute", classAttrs[0]))
 	}
 	// Allocate return structure
 	_, rows := X.Size()
-	// labelVec := make([]float64, rows)
+
 	labelVec := make([]float64, rows)
 	// Resolve class Attribute specification
 	classAttrSpecs := base.ResolveAttributes(X, classAttrs)
@@ -388,5 +408,5 @@ func regressorConvertInstancesToLabelVec(X base.FixedDataGrid) []float64 {
 		labelVec[rowNo] = base.UnpackBytesToFloat(row[0])
 		return true, nil
 	})
-	return labelVec
+	return labelVec, nil
 }
diff --git a/trees/cart_test.go b/trees/cart_test.go
index 50387b9..99374c8 100644
--- a/trees/cart_test.go
+++ b/trees/cart_test.go
@@ -13,12 +13,12 @@ func TestRegressor(t *testing.T) {
 		// For Classification Trees:
 
 		// Is Gini being calculated correctly
-		gini, giniMaxLabel := giniImpurity([]int64{1, 0, 0, 1}, []int64{0, 1})
+		gini, giniMaxLabel := computeGiniImpurityAndModeLabel([]int64{1, 0, 0, 1}, []int64{0, 1})
 		So(gini, ShouldEqual, 0.5)
 		So(giniMaxLabel, ShouldNotBeNil)
 
 		// Is Entropy being calculated correctly
-		entropy, entropyMaxLabel := entropy([]int64{1, 0, 0, 1}, []int64{0, 1})
+		entropy, entropyMaxLabel := computeEntropyAndModeLabel([]int64{1, 0, 0, 1}, []int64{0, 1})
 		So(entropy, ShouldEqual, 1.0)
 		So(entropyMaxLabel, ShouldNotBeNil)
 
@@ -61,12 +61,12 @@ func TestRegressor(t *testing.T) {
 		// For Regression Trees
 
 		// Is MAE being calculated correctly
-		mae, maeMaxLabel := maeImpurity([]float64{1, 3, 5})
+		mae, maeMaxLabel := computeMaeImpurityAndAverage([]float64{1, 3, 5})
 		So(mae, ShouldEqual, (4.0 / 3.0))
 		So(maeMaxLabel, ShouldNotBeNil)
 
 		// Is Entropy being calculated correctly
-		mse, mseMaxLabel := mseImpurity([]float64{1, 3, 5})
+		mse, mseMaxLabel := computeMseImpurityAndAverage([]float64{1, 3, 5})
 		So(mse, ShouldEqual, (8.0 / 3.0))
 		So(mseMaxLabel, ShouldNotBeNil)
 

From ae2338c2c1b98fecf56b193591a331fdf3eca76f Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Fri, 31 Jul 2020 12:38:34 +0530
Subject: [PATCH 16/24] Updating package level details

---
 trees/trees.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/trees/trees.go b/trees/trees.go
index ae8271e..d968b3e 100644
--- a/trees/trees.go
+++ b/trees/trees.go
@@ -11,6 +11,14 @@
 			present, so discretise beforehand (see
 			filters)
 
+  CART (Classification and Regression Trees):
+    Builds a binary decision tree using the CART algorithm
+      using a greedy approach to find the best split at each node.
+
+    Can be used for regression and classficiation.
+      Attributes have to be FloatAttributes even for classification.
+      Hence, convert to Integer Labels before hand for Classficiation.
+
 	RandomTree:
 		Builds a decision tree using the ID3 algorithm
 			by picking the Attribute amongst those

From 9d1ac82a40d6141e1bd0cdd6d1dd68bc430d981b Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 1 Aug 2020 11:25:53 +0530
Subject: [PATCH 17/24] Optimizing Loss Calculation

---
 examples/trees/cart.go   | 12 +++++++---
 trees/cart_classifier.go | 51 ++++++++++++++++++++--------------------
 trees/cart_test.go       |  8 +++----
 trees/cart_utils.go      | 21 +++++------------
 4 files changed, 43 insertions(+), 49 deletions(-)

diff --git a/examples/trees/cart.go b/examples/trees/cart.go
index f465d54..a6fc909 100644
--- a/examples/trees/cart.go
+++ b/examples/trees/cart.go
@@ -35,10 +35,13 @@ func main() {
 
 	// Create New Classification Tree
 	// Hyperparameters - loss function, max Depth (-1 will split until pure), list of unique labels
-	decTree = NewDecisionTreeClassifier("entropy", -1, []int64{0, 1})
+	decTree := NewDecisionTreeClassifier("entropy", -1, []int64{0, 1})
 
 	// Train Tree
-	decTree.Fit(trainData)
+	err = decTree.Fit(trainData)
+	if err != nil {
+		panic(err)
+	}
 	// Print out tree for visualization - shows splits and feature and predictions
 	fmt.Println(decTree.String())
 
@@ -62,7 +65,10 @@ func main() {
 	regTree := NewDecisionTreeRegressor("mse", -1)
 
 	// Train Tree
-	regTree.Fit(trainRegData)
+	err = regTree.Fit(trainRegData)
+	if err != nil {
+		panic(err)
+	}
 
 	// Print out tree for visualization
 	fmt.Println(regTree.String())
diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 828f2dc..bb9af51 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -39,25 +39,31 @@ type CARTDecisionTreeClassifier struct {
 	triedSplits [][]float64
 }
 
+// Convert a series of labels to frequency map for efficient impurity calculation
+func convertToMap(y []int64, labels []int64) map[int64]int {
+	labelCount := make(map[int64]int)
+	for _, label := range labels {
+		labelCount[label] = 0
+	}
+	for _, value := range y {
+		labelCount[value]++
+	}
+	return labelCount
+}
+
 // Calculate Gini Impurity of Target Labels
 func computeGiniImpurityAndModeLabel(y []int64, labels []int64) (float64, int64) {
 	nInstances := len(y)
 	gini := 0.0
-	maxLabelCount := 0
 	var maxLabel int64 = 0
-	for label := range labels {
-		numLabel := 0
-		for target := range y {
-			if y[target] == labels[label] {
-				numLabel++
-			}
+
+	labelCount := convertToMap(y, labels)
+	for _, label := range labels {
+		if labelCount[label] > labelCount[maxLabel] {
+			maxLabel = label
 		}
-		p := float64(numLabel) / float64(nInstances)
+		p := float64(labelCount[label]) / float64(nInstances)
 		gini += p * (1 - p)
-		if numLabel > maxLabelCount {
-			maxLabel = labels[label]
-			maxLabelCount = numLabel
-		}
 	}
 	return gini, maxLabel
 }
@@ -66,26 +72,19 @@ func computeGiniImpurityAndModeLabel(y []int64, labels []int64) (float64, int64)
 func computeEntropyAndModeLabel(y []int64, labels []int64) (float64, int64) {
 	nInstances := len(y)
 	entropy := 0.0
-	maxLabelCount := 0
 	var maxLabel int64 = 0
-	for label := range labels {
-		numLabel := 0
-		for target := range y {
-			if y[target] == labels[label] {
-				numLabel++
-			}
-		}
-		p := float64(numLabel) / float64(nInstances)
 
+	labelCount := convertToMap(y, labels)
+	for _, label := range labels {
+		if labelCount[label] > labelCount[maxLabel] {
+			maxLabel = label
+		}
+		p := float64(labelCount[label]) / float64(nInstances)
 		logP := math.Log2(p)
 		if p == 0 {
 			logP = 0
 		}
-		entropy += -p * logP
-		if numLabel > maxLabelCount {
-			maxLabel = labels[label]
-			maxLabelCount = numLabel
-		}
+		entropy += (-p * logP)
 	}
 	return entropy, maxLabel
 }
diff --git a/trees/cart_test.go b/trees/cart_test.go
index 99374c8..3edee6d 100644
--- a/trees/cart_test.go
+++ b/trees/cart_test.go
@@ -1,7 +1,6 @@
 package trees
 
 import (
-	"fmt"
 	"testing"
 
 	. "github.com/smartystreets/goconvey/convey"
@@ -42,8 +41,7 @@ func TestRegressor(t *testing.T) {
 
 		// is data reordered correctly
 		orderedData, orderedY := classifierReOrderData(getFeature(classifierData, 1), classifierData, classifiery)
-		fmt.Println(orderedData)
-		fmt.Println(orderedY)
+
 		So(orderedData[1][1], ShouldEqual, 3.0)
 		So(orderedY[0], ShouldEqual, 1)
 
@@ -81,9 +79,9 @@ func TestRegressor(t *testing.T) {
 		leftData, rightData, leftY, rightY := regressorCreateSplit(data, 1, y, 5.0)
 
 		So(len(leftData), ShouldEqual, 2)
-		So(len(lefty), ShouldEqual, 2)
+		So(len(leftY), ShouldEqual, 2)
 		So(len(rightData), ShouldEqual, 2)
-		So(len(righty), ShouldEqual, 2)
+		So(len(rightY), ShouldEqual, 2)
 
 		// is data reordered correctly
 		regressorOrderedData, regressorOrderedY := regressorReOrderData(getFeature(data, 1), data, y)
diff --git a/trees/cart_utils.go b/trees/cart_utils.go
index d3b9b4a..251dee9 100644
--- a/trees/cart_utils.go
+++ b/trees/cart_utils.go
@@ -4,23 +4,14 @@ import (
 	"github.com/sjwhitworth/golearn/base"
 )
 
-// Helper Function to check if data point is unique or not.
-// We will use this to isolate unique values of a feature
-func stringInSlice(a float64, list []float64) bool {
-	for _, b := range list {
-		if b == a {
-			return true
-		}
-	}
-	return false
-}
-
 // Isolate only unique values. This way, we can try only unique splits and not redundant ones.
 func findUnique(data []float64) []float64 {
-	var unique []float64
-	for i := range data {
-		if !stringInSlice(data[i], unique) {
-			unique = append(unique, data[i])
+	keys := make(map[float64]bool)
+	unique := []float64{}
+	for _, entry := range data {
+		if _, value := keys[entry]; !value {
+			keys[entry] = true
+			unique = append(unique, entry)
 		}
 	}
 	return unique

From 6a42fcd4aede0d430800cfb1a668e04cf172d386 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 1 Aug 2020 11:36:53 +0530
Subject: [PATCH 18/24] catching nInstances == 0

---
 trees/cart_classifier.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index bb9af51..cf9f98d 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -90,6 +90,9 @@ func computeEntropyAndModeLabel(y []int64, labels []int64) (float64, int64) {
 }
 
 func calculateClassificationLoss(y []int64, labels []int64, criterion string) (float64, int64, error) {
+	if len(y) == 0 {
+		return 0, 0, errors.New("Need atleast 1 value to compute impurity")
+	}
 	if criterion == GINI {
 		loss, modeLabel := computeGiniImpurityAndModeLabel(y, labels)
 		return loss, modeLabel, nil

From cd2b86aa2edbf1a609e2925d1bfa6abf29f1e6f5 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 1 Aug 2020 11:43:14 +0530
Subject: [PATCH 19/24] Changing var name

---
 trees/cart_classifier.go | 20 ++++++++++----------
 trees/cart_regressor.go  | 20 ++++++++++----------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index cf9f98d..1ed92d1 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -155,16 +155,16 @@ func classifierReOrderData(featureVal []float64, data [][]float64, y []int64) ([
 }
 
 // Update the left and right side of the split based on the threshold.
-func classifierUpdateSplit(left [][]float64, lefty []int64, right [][]float64, righty []int64, feature int64, threshold float64) ([][]float64, []int64, [][]float64, []int64) {
+func classifierUpdateSplit(left [][]float64, leftY []int64, right [][]float64, rightY []int64, feature int64, threshold float64) ([][]float64, []int64, [][]float64, []int64) {
 
 	for right[0][feature] < threshold {
 		left = append(left, right[0])
 		right = right[1:]
-		lefty = append(lefty, righty[0])
-		righty = righty[1:]
+		leftY = append(leftY, rightY[0])
+		rightY = rightY[1:]
 	}
 
-	return left, lefty, right, righty
+	return left, leftY, right, rightY
 }
 
 // Fit - Creates an Emppty Root Node2
@@ -232,7 +232,7 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 		firstTime := true
 
 		var left, right [][]float64
-		var lefty, righty []int64
+		var leftY, rightY []int64
 		// Iterate over all possible thresholds for that feature
 		for j := 0; j < len(unique)-1; j++ {
 
@@ -242,17 +242,17 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 				// We need to split data from fresh when considering new feature for the first time.
 				// Otherwise, we need to update the split by moving data points from left to right.
 				if firstTime {
-					left, right, lefty, righty = classifierCreateSplit(sortData, int64(i), sortY, threshold)
+					left, right, leftY, rightY = classifierCreateSplit(sortData, int64(i), sortY, threshold)
 					firstTime = false
 				} else {
-					left, lefty, right, righty = classifierUpdateSplit(left, lefty, right, righty, int64(i), threshold)
+					left, leftY, right, rightY = classifierUpdateSplit(left, leftY, right, rightY, int64(i), threshold)
 				}
 
 				var leftGini, rightGini float64
 				var leftLabels, rightLabels int64
 
-				leftGini, leftLabels, _ = calculateClassificationLoss(lefty, labels, criterion)
-				rightGini, rightLabels, _ = calculateClassificationLoss(righty, labels, criterion)
+				leftGini, leftLabels, _ = calculateClassificationLoss(leftY, labels, criterion)
+				rightGini, rightLabels, _ = calculateClassificationLoss(rightY, labels, criterion)
 
 				// Calculate weighted gini impurity of child nodes
 				subGini := (leftGini * float64(len(left)) / float64(numData)) + (rightGini * float64(len(right)) / float64(numData))
@@ -263,7 +263,7 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 
 					bestLeft, bestRight = left, right
 
-					bestLefty, bestRighty = lefty, righty
+					bestLefty, bestRighty = leftY, rightY
 
 					upperNode.Threshold, upperNode.Feature = threshold, int64(i)
 
diff --git a/trees/cart_regressor.go b/trees/cart_regressor.go
index 96d3405..69ae9d6 100644
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@@ -143,16 +143,16 @@ func regressorReOrderData(featureVal []float64, data [][]float64, y []float64) (
 }
 
 // Update the left and right data based on change in threshold
-func regressorUpdateSplit(left [][]float64, lefty []float64, right [][]float64, righty []float64, feature int64, threshold float64) ([][]float64, []float64, [][]float64, []float64) {
+func regressorUpdateSplit(left [][]float64, leftY []float64, right [][]float64, rightY []float64, feature int64, threshold float64) ([][]float64, []float64, [][]float64, []float64) {
 
 	for right[0][feature] < threshold {
 		left = append(left, right[0])
 		right = right[1:]
-		lefty = append(lefty, righty[0])
-		righty = righty[1:]
+		leftY = append(leftY, rightY[0])
+		rightY = rightY[1:]
 	}
 
-	return left, lefty, right, righty
+	return left, leftY, right, rightY
 }
 
 // Fit - Build the tree using the data
@@ -217,23 +217,23 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 		firstTime := true
 
 		var left, right [][]float64
-		var lefty, righty []float64
+		var leftY, rightY []float64
 
 		for j := 0; j < len(unique)-1; j++ {
 			threshold := (unique[j] + unique[j+1]) / 2
 			if validate(tree.triedSplits, int64(i), threshold) {
 				if firstTime {
-					left, right, lefty, righty = regressorCreateSplit(sortData, int64(i), sortY, threshold)
+					left, right, leftY, rightY = regressorCreateSplit(sortData, int64(i), sortY, threshold)
 					firstTime = false
 				} else {
-					left, lefty, right, righty = regressorUpdateSplit(left, lefty, right, righty, int64(i), threshold)
+					left, leftY, right, rightY = regressorUpdateSplit(left, leftY, right, rightY, int64(i), threshold)
 				}
 
 				var leftLoss, rightLoss float64
 				var leftPred, rightPred float64
 
-				leftLoss, leftPred, _ = calculateRegressionLoss(lefty, criterion)
-				rightLoss, rightPred, _ = calculateRegressionLoss(righty, criterion)
+				leftLoss, leftPred, _ = calculateRegressionLoss(leftY, criterion)
+				rightLoss, rightPred, _ = calculateRegressionLoss(rightY, criterion)
 
 				subLoss := (leftLoss * float64(len(left)) / float64(numData)) + (rightLoss * float64(len(right)) / float64(numData))
 
@@ -241,7 +241,7 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 					bestLoss = subLoss
 
 					bestLeft, bestRight = left, right
-					bestLefty, bestRighty = lefty, righty
+					bestLefty, bestRighty = leftY, rightY
 
 					upperNode.Threshold, upperNode.Feature = threshold, int64(i)
 

From 8ae385ca25a807a0e268324c91a89d7088d75a01 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 1 Aug 2020 13:16:34 +0530
Subject: [PATCH 20/24] Complexity Analysis for Algorithm

---
 examples/trees/cart.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/examples/trees/cart.go b/examples/trees/cart.go
index a6fc909..22d3763 100644
--- a/examples/trees/cart.go
+++ b/examples/trees/cart.go
@@ -11,13 +11,21 @@ import (
 func main() {
 	/* Performance of CART Algorithm:
 
-		Training Time for Titanic Dataset ≈ 713 µs
-		Prediction Time for Titanic Datset ≈ 133 µs
+		Training Time for Titanic Dataset ≈ 611 µs
+		Prediction Time for Titanic Datset ≈ 101 µs
+
+		Complexity Analysis:
+			1x Dataset --   x ms
+			2x Dataset --   1.7x ms
+			128x Dataset -- 74x ms
+
+			Complexity is sub linear
 
 		Sklearn:
 		Training Time for Titanic Dataset ≈ 8.8 µs
 		Prediction Time for Titanic Datset ≈ 7.87 µs
 
+
 		This implementation and sci-kit learn produce the exact same tree for the exact same dataset.
 		Predictions on the same test set also yield the exact same accuracy.
 

From cad05a087a1785511040d223d57bf47473f8d84c Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 1 Aug 2020 15:11:38 +0530
Subject: [PATCH 21/24] Updating Logistic.go

---
 linear_models/logistic.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/linear_models/logistic.go b/linear_models/logistic.go
index 14ff0d2..96c3206 100644
--- a/linear_models/logistic.go
+++ b/linear_models/logistic.go
@@ -3,7 +3,6 @@ package linear_models
 import (
 	"errors"
 	"fmt"
-
 	"github.com/sjwhitworth/golearn/base"
 )
 

From e55a329d8aa8938e52e2bfe3622d175ad59d39ee Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Sat, 1 Aug 2020 15:32:59 +0530
Subject: [PATCH 22/24] Fixing Bug

---
 examples/trees/cart.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/trees/cart.go b/examples/trees/cart.go
index 22d3763..5a19cab 100644
--- a/examples/trees/cart.go
+++ b/examples/trees/cart.go
@@ -6,6 +6,8 @@ import (
 	"fmt"
 
 	"github.com/sjwhitworth/golearn/base"
+	"github.com/sjwhitworth/golearn/trees"
+
 )
 
 func main() {

From b689fe0c58d68d6d91ef77729f884ba5190a25aa Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Mon, 3 Aug 2020 09:41:35 +0530
Subject: [PATCH 23/24] Fixing Typo + tmp file

---
 trees/cart_classifier.go |   3 +--
 trees/tmp                | Bin 409 -> 413 bytes
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/trees/cart_classifier.go b/trees/cart_classifier.go
index 1ed92d1..daf6a29 100644
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@@ -167,7 +167,7 @@ func classifierUpdateSplit(left [][]float64, leftY []int64, right [][]float64, r
 	return left, leftY, right, rightY
 }
 
-// Fit - Creates an Emppty Root Node2
+// Fit - Creates an Empty Root Node
 // Trains the tree by calling recursive function classifierBestSplit
 func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) error {
 	var emptyNode classifierNode
@@ -422,7 +422,6 @@ func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) ([]int64, error)
 	// Only support 1 class Attribute
 	if len(classAttrs) != 1 {
 		return []int64{0}, errors.New(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs)))
-
 	}
 	// ClassAttribute must be numeric
 	if _, ok := classAttrs[0].(*base.FloatAttribute); !ok {
diff --git a/trees/tmp b/trees/tmp
index af98d1a33b82338d7466955c2c6aafb41cd3496c..28c93c507c8869a97a9ff1d9eecbd160475a62e3 100644
GIT binary patch
delta 383
zcmV-_0f7FQ1DykqCx0+EHfB(um3vS?X>mzn5rcsNrRJb}Zis;N{G8OpqC5pdy&>b|
zQ9woD>l)(d;uzvcOFIbVe?wyv^!#sRXfT@p=>e*h((;RP6HDUDQj3Z+^YfIf40V)}
za}tY-Gt)9ti<GRCJY9^Hbd;RoLSZn4N``s{O2~@ib5lzaQ-2am5|ym-N^^2*2ik9=
zfG_|600960?3A&p!!Qg*|3!0#B*QcKXG~tLwxUK3_H|1M`S;=`rF81lOu3WbAhhUQ
zB<5e=_~w7#|9$%38e_WM|1C0+M$ZoSkr(yP5`1D^6khFiUNXo|l;aWLI(PGnWhBuP
z**9#+^FeA!xOn<rE|tK~?J)@403`V@V6s&7qVcBGtn4}jvhBP#$7Xo|009600!0wG
zY<DavDWZ`b1nYkTbF}*3&}_8+rw^!B0_8_g?kr2pDNQX_vWiAz<k(srrM$%4RMgB3
dDJztc6H8Ll^NTW*6LXYCGwe_W00000|Np{jyq*96

delta 379
zcmV->0fhdY1DOMmCw~M9DHzboLCB!AxFoTN!GL1B&^<RqKze>oYGP5If}!4!aq=jj
zBJgz$addGEaipakgz~?ku?brKH!w0c7|s9m0M$xq`9-;jCGlmcMa7x<c}iA>I!eho
ziN(d4X_=`-N>)mqF2+haO3rYhFqlFmLp=i}WX18hsU?XiiGL-DN>+KLIXSfh?YB`t
z7ytkO0RR8&l(7oJKn#Z8C3A<ipu@e)QN5_aBQ3fA6e)doIa?82ob1$p3Veht`F<Gl
zzi<5Vzis#5{@1%^^M8$uq<vtI$H24t7m2}GXN4ELeUJolBc=WXaG9Iw%@R!XOzs0K
z3Vf7O6282bw{#r&wLL_E?*NlS29u+#SMBdg)xvIFBv(&cbF7vp00030{{lr2xNLVU
zDJi0n9R%xt19SBDzu9R0PajaN1j>(~+*y{GQ<_?=WEG9b$g#CLN_mO7si>J7QdTG>
ZCzhn9=NDxrC*~-PX4s($00000|NlQXwFm$J


From 27b86ce3ea4e3f9137c6acb0c1f8790fe2813806 Mon Sep 17 00:00:00 2001
From: Ayush <ayushgoel2004@gmail.com>
Date: Thu, 6 Aug 2020 21:43:47 +0530
Subject: [PATCH 24/24] Delete tmp

---
 trees/tmp | Bin 413 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 trees/tmp

diff --git a/trees/tmp b/trees/tmp
deleted file mode 100644
index 28c93c507c8869a97a9ff1d9eecbd160475a62e3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 413
zcmV;O0b>3iiwFP!00000|8n*Tj`wx+^K^3!4q>1XFn|DDw1I)4F@g;eH#9IeHfB(u
zm3vS?X>mzn5rcsNrRJb}Zis;N{G8OpqC5pdy&>b|Q9woD>l)(d;uzvcOFIbVe?wyv
z^!#sRXfT@p=>e*h((;RP6HDUDQj3Z+^YfIf40V)}a}tY-Gt)9ti<GRCJY9^Hbd;Ro
zLSZn4N``s{O2~@ib5lzaQxZ!Om8|kgb8>12+Ha$PFaQ7m0RR8&l(DMAFbqZiMRSHE
z!!!72OkS<FqDBt(bxR5P_u?j{bn4Vhxs%`^wCG$U=3n3V=6~P+efr-TW4hh{Ei#ct
z&kpyI7xm8)d}3V`UhQ^XGRRJp;}PIGck_#7B+(PuH*CoBL2603`d%)Tz|ZY52;2Z9
z`7dCyRP>_prqrzLIs~%qyf?>Yc>n+a|NjC-5V&l2EGa3XksSo<e*<&0`rpuOwEm|L
zs8#~yM^Nr8OUx-vEmpFMMr7pJS{<dl#N1TW%nd0kl#&xmQq%K`GLsW?ltwe`Pz3-0
H00960!WYKu