Removing Clutter

Partial Modularization of best split method. Shorten method by declaring variables in same line as well. Also removing redundant functions, and adding into cart_utils.
2025-04-26 13:49:14 +08:00 · 2020-07-28 14:17:18 +05:30 · 2020-07-28 14:17:18 +05:30 · 2d2af0a58f
commit 2d2af0a58f
parent ef751e62c4
4 changed files with 181 additions and 258 deletions
--- a/trees/cart_classifier.go
+++ b/trees/cart_classifier.go
@ -90,6 +90,16 @@ func entropy(y []int64, labels []int64) (float64, int64) {
 	return entropy, maxLabel
 }

+func calculateClassificationLoss(y []int64, labels []int64, criterion string) (float64, int64) {
+	if criterion == GINI {
+		return giniImpurity(y, labels)
+	} else if criterion == ENTROPY {
+		return entropy(y, labels)
+	} else {
+		panic("Invalid impurity function, choose from GINI or ENTROPY")
+	}
+}
+
 // Split the data into left node and right node based on feature and threshold
 func classifierCreateSplit(data [][]float64, feature int64, y []int64, threshold float64) ([][]float64, [][]float64, []int64, []int64) {
 	var left [][]float64
@ -111,37 +121,6 @@ func classifierCreateSplit(data [][]float64, feature int64, y []int64, threshold
 	return left, right, lefty, righty
 }

-// Helper Function to check if data point is unique or not.
-// We will use this to isolate unique values of a feature
-func classifierStringInSlice(a float64, list []float64) bool {
-	for _, b := range list {
-		if b == a {
-			return true
-		}
-	}
-	return false
-}
-
-// Isolate only unique values. This way, we can try only unique splits and not redundant ones.
-func classifierFindUnique(data []float64) []float64 {
-	var unique []float64
-	for i := range data {
-		if !classifierStringInSlice(data[i], unique) {
-			unique = append(unique, data[i])
-		}
-	}
-	return unique
-}
-
-// Isolate only the feature being considered for splitting. Reduces the complexity in managing splits.
-func classifierGetFeature(data [][]float64, feature int64) []float64 {
-	var featureVals []float64
-	for i := range data {
-		featureVals = append(featureVals, data[i][feature])
-	}
-	return featureVals
-}
-
 // Function to Create New Decision Tree Classifier.
 // It assigns all of the hyperparameters by user into the tree attributes.
 func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64) *CARTDecisionTreeClassifier {
@ -153,19 +132,6 @@ func NewDecisionTreeClassifier(criterion string, maxDepth int64, labels []int64)
 	return &tree
 }

-// Make sure that split being considered has not been done before.
-// Else we will unnecessarily try splits that won't improve Impurity.
-func classifierValidate(triedSplits [][]float64, feature int64, threshold float64) bool {
-	for i := range triedSplits {
-		split := triedSplits[i]
-		featureTried, thresholdTried := split[0], split[1]
-		if int64(featureTried) == feature && thresholdTried == threshold {
-			return false
-		}
-	}
-	return true
-}
-
 // Reorder the data by feature being considered. Optimizes code by reducing the number of times we have to loop over data for splitting
 func classifierReOrderData(featureVal []float64, data [][]float64, y []int64) ([][]float64, []int64) {
 	s := NewSlice(featureVal)
@ -202,7 +168,7 @@ func classifierUpdateSplit(left [][]float64, lefty []int64, right [][]float64, r
 func (tree *CARTDecisionTreeClassifier) Fit(X base.FixedDataGrid) {
 	var emptyNode classifierNode

-	data := classifierConvertInstancesToProblemVec(X)
+	data := convertInstancesToProblemVec(X)
 	y := classifierConvertInstancesToLabelVec(X)
 	emptyNode = classifierBestSplit(*tree, data, y, tree.labels, emptyNode, tree.criterion, tree.maxDepth, 0)

@ -221,40 +187,29 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 	}

 	numFeatures := len(data[0])
-	var bestGini float64
-	var origGini float64
+	var bestGini, origGini float64

 	// Calculate loss based on Criterion Specified by user
-	if criterion == GINI {
-		origGini, upperNode.LeftLabel = giniImpurity(y, labels)
-	} else if criterion == ENTROPY {
-		origGini, upperNode.LeftLabel = entropy(y, labels)
-	} else {
-		panic("Invalid impurity function, choose from GINI or ENTROPY")
-	}
+	origGini, upperNode.LeftLabel = calculateClassificationLoss(y, labels, criterion)

 	bestGini = origGini

-	bestLeft := data
-	bestRight := data
-	bestLefty := y
-	bestRighty := y
+	bestLeft, bestRight, bestLefty, bestRighty := data, data, y, y

 	numData := len(data)

-	bestLeftGini := bestGini
-	bestRightGini := bestGini
+	bestLeftGini, bestRightGini := bestGini, bestGini

 	upperNode.Use_not = true

-	var leftN classifierNode
-	var rightN classifierNode
+	var leftN, rightN classifierNode
+
 	// Iterate over all features
 	for i := 0; i < numFeatures; i++ {
-		featureVal := classifierGetFeature(data, int64(i))
-		unique := classifierFindUnique(featureVal)
+
+		featureVal := getFeature(data, int64(i))
+		unique := findUnique(featureVal)
 		sort.Float64s(unique)
-		numUnique := len(unique)

 		sortData, sortY := classifierReOrderData(featureVal, data, y)

@ -263,53 +218,43 @@ func classifierBestSplit(tree CARTDecisionTreeClassifier, data [][]float64, y []
 		var left, right [][]float64
 		var lefty, righty []int64
 		// Iterate over all possible thresholds for that feature
-		for j := range unique {
-			if j != (numUnique - 1) {
-				threshold := (unique[j] + unique[j+1]) / 2
-				// Ensure that same split has not been made before
-				if classifierValidate(tree.triedSplits, int64(i), threshold) {
-					// We need to split data from fresh when considering new feature for the first time.
-					// Otherwise, we need to update the split by moving data points from left to right.
-					if firstTime {
-						left, right, lefty, righty = classifierCreateSplit(sortData, int64(i), sortY, threshold)
-						firstTime = false
-					} else {
-						left, lefty, right, righty = classifierUpdateSplit(left, lefty, right, righty, int64(i), threshold)
-					}
+		for j := 0; j < len(unique)-1; j++ {

-					var leftGini float64
-					var rightGini float64
-					var leftLabels int64
-					var rightLabels int64
-
-					if criterion == GINI {
-						leftGini, leftLabels = giniImpurity(lefty, labels)
-						rightGini, rightLabels = giniImpurity(righty, labels)
-					} else if criterion == ENTROPY {
-						leftGini, leftLabels = entropy(lefty, labels)
-						rightGini, rightLabels = entropy(righty, labels)
-					}
-					// Calculate weighted gini impurity of child nodes
-					subGini := (leftGini * float64(len(left)) / float64(numData)) + (rightGini * float64(len(right)) / float64(numData))
-
-					// If we find a split that reduces impurity
-					if subGini < bestGini {
-						bestGini = subGini
-						bestLeft = left
-						bestRight = right
-						bestLefty = lefty
-						bestRighty = righty
-						upperNode.Threshold = threshold
-						upperNode.Feature = int64(i)
-
-						upperNode.LeftLabel = leftLabels
-						upperNode.RightLabel = rightLabels
-
-						bestLeftGini = leftGini
-						bestRightGini = rightGini
-					}
+			threshold := (unique[j] + unique[j+1]) / 2
+			// Ensure that same split has not been made before
+			if validate(tree.triedSplits, int64(i), threshold) {
+				// We need to split data from fresh when considering new feature for the first time.
+				// Otherwise, we need to update the split by moving data points from left to right.
+				if firstTime {
+					left, right, lefty, righty = classifierCreateSplit(sortData, int64(i), sortY, threshold)
+					firstTime = false
+				} else {
+					left, lefty, right, righty = classifierUpdateSplit(left, lefty, right, righty, int64(i), threshold)
 				}

+				var leftGini, rightGini float64
+				var leftLabels, rightLabels int64
+
+				leftGini, leftLabels = calculateClassificationLoss(lefty, labels, criterion)
+				rightGini, rightLabels = calculateClassificationLoss(righty, labels, criterion)
+
+				// Calculate weighted gini impurity of child nodes
+				subGini := (leftGini * float64(len(left)) / float64(numData)) + (rightGini * float64(len(right)) / float64(numData))
+
+				// If we find a split that reduces impurity
+				if subGini < bestGini {
+					bestGini = subGini
+
+					bestLeft, bestRight = left, right
+
+					bestLefty, bestRighty = lefty, righty
+
+					upperNode.Threshold, upperNode.Feature = threshold, int64(i)
+
+					upperNode.LeftLabel, upperNode.RightLabel = leftLabels, rightLabels
+
+					bestLeftGini, bestRightGini = leftGini, rightGini
+				}
 			}
 		}
 	}
@ -366,10 +311,8 @@ func classifierPrintTreeFromNode(tree classifierNode, spacing string) string {
 		returnString += spacing + "---> True" + "\n"
 		returnString += "  " + spacing + "PREDICT    "
 		returnString += strconv.FormatInt(tree.LeftLabel, 10) + "\n"
-
 	}
 	if tree.Right == nil {
-
 		returnString += spacing + "---> False" + "\n"
 		returnString += "  " + spacing + "PREDICT    "
 		returnString += strconv.FormatInt(tree.RightLabel, 10) + "\n"
@ -409,7 +352,7 @@ func classifierPredictSingle(tree classifierNode, instance []float64) int64 {
 // Given test data, return predictions for every datapoint. calls classifierPredictFromNode
 func (tree *CARTDecisionTreeClassifier) Predict(X_test base.FixedDataGrid) []int64 {
 	root := *tree.RootNode
-	test := classifierConvertInstancesToProblemVec(X_test)
+	test := convertInstancesToProblemVec(X_test)
 	return classifierPredictFromNode(root, test)
 }

@ -429,7 +372,7 @@ func classifierPredictFromNode(tree classifierNode, test [][]float64) []int64 {
 // Calls classifierEvaluateFromNode
 func (tree *CARTDecisionTreeClassifier) Evaluate(test base.FixedDataGrid) float64 {
 	rootNode := *tree.RootNode
-	xTest := classifierConvertInstancesToProblemVec(test)
+	xTest := convertInstancesToProblemVec(test)
 	yTest := classifierConvertInstancesToLabelVec(test)
 	return classifierEvaluateFromNode(rootNode, xTest, yTest)
 }
@ -447,31 +390,6 @@ func classifierEvaluateFromNode(tree classifierNode, xTest [][]float64, yTest []
 	return accuracy
 }

-// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
-func classifierConvertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
-	// Allocate problem array
-	_, rows := X.Size()
-	problemVec := make([][]float64, rows)
-
-	// Retrieve numeric non-class Attributes
-	numericAttrs := base.NonClassFloatAttributes(X)
-	numericAttrSpecs := base.ResolveAttributes(X, numericAttrs)
-
-	// Convert each row
-	X.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
-		// Allocate a new row
-		probRow := make([]float64, len(numericAttrSpecs))
-		// Read out the row
-		for i, _ := range numericAttrSpecs {
-			probRow[i] = base.UnpackBytesToFloat(row[i])
-		}
-		// Add the row
-		problemVec[rowNo] = probRow
-		return true, nil
-	})
-	return problemVec
-}
-
 // Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
 func classifierConvertInstancesToLabelVec(X base.FixedDataGrid) []int64 {
 	// Get the class Attributes
--- a/trees/cart_regressor.go
+++ b/trees/cart_regressor.go
@ -81,6 +81,16 @@ func mseImpurity(y []float64) (float64, float64) {
 	return meanSquaredError(y, yHat), yHat
 }

+func calculateRegressionLoss(y []float64, criterion string) (float64, float64) {
+	if criterion == MAE {
+		return maeImpurity(y)
+	} else if criterion == MSE {
+		return mseImpurity(y)
+	} else {
+		panic("Invalid impurity function, choose from MAE or MSE")
+	}
+}
+
 // Split the data into left and right based on trehsold and feature.
 func regressorCreateSplit(data [][]float64, feature int64, y []float64, threshold float64) ([][]float64, [][]float64, []float64, []float64) {
 	var left [][]float64
@ -102,39 +112,6 @@ func regressorCreateSplit(data [][]float64, feature int64, y []float64, threshol
 	return left, right, lefty, righty
 }

-// Helper function for finding unique values.
-// Used for isolating unique values in a feature.
-func regressorStringInSlice(a float64, list []float64) bool {
-	for _, b := range list {
-		if b == a {
-			return true
-		}
-	}
-	return false
-}
-
-// Isolate only unique values.
-// This way we can only try unique splits.
-func regressorFindUnique(data []float64) []float64 {
-	var unique []float64
-	for i := range data {
-		if !regressorStringInSlice(data[i], unique) {
-			unique = append(unique, data[i])
-		}
-	}
-	return unique
-}
-
-// Extract out a single feature from data.
-// Reduces complexity in managing splits and sorting
-func regressorGetFeature(data [][]float64, feature int64) []float64 {
-	var featureVals []float64
-	for i := range data {
-		featureVals = append(featureVals, data[i][feature])
-	}
-	return featureVals
-}
-
 // Interface for creating new Decision Tree Regressor
 func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDecisionTreeRegressor {
 	var tree CARTDecisionTreeRegressor
@ -143,19 +120,6 @@ func NewDecisionTreeRegressor(criterion string, maxDepth int64) *CARTDecisionTre
 	return &tree
 }

-// Validate that the split being tested has not been done before.
-// This prevents redundant splits from hapenning.
-func regressorValidate(triedSplits [][]float64, feature int64, threshold float64) bool {
-	for i := range triedSplits {
-		split := triedSplits[i]
-		featureTried, thresholdTried := split[0], split[1]
-		if int64(featureTried) == feature && thresholdTried == threshold {
-			return false
-		}
-	}
-	return true
-}
-
 // Re order data based on a feature for optimizing code
 // Helps in updating splits without reiterating entire dataset
 func regressorReOrderData(featureVal []float64, data [][]float64, y []float64) ([][]float64, []float64) {
@ -204,6 +168,7 @@ func (tree *CARTDecisionTreeRegressor) Fit(X base.FixedDataGrid) {
 // Recursive function - stops if maxDepth is reached or nodes are pure
 func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []float64, upperNode regressorNode, criterion string, maxDepth int64, depth int64) regressorNode {

+	// Ensure that we have not reached maxDepth. maxDepth =-1 means split until nodes are pure
 	depth++

 	if depth > maxDepth && maxDepth != -1 {
@ -211,39 +176,27 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 	}

 	numFeatures := len(data[0])
-	var bestLoss float64
-	var origLoss float64
+	var bestLoss, origLoss float64

-	if criterion == MAE {
-		origLoss, upperNode.LeftPred = maeImpurity(y)
-	} else if criterion == MSE {
-		origLoss, upperNode.LeftPred = mseImpurity(y)
-	} else {
-		panic("Invalid impurity function, choose from MAE or MSE")
-	}
+	origLoss, upperNode.LeftPred = calculateRegressionLoss(y, criterion)

 	bestLoss = origLoss

-	bestLeft := data
-	bestRight := data
-	bestLefty := y
-	bestRighty := y
+	bestLeft, bestRight, bestLefty, bestRighty := data, data, y, y

 	numData := len(data)

-	bestLeftLoss := bestLoss
-	bestRightLoss := bestLoss
+	bestLeftLoss, bestRightLoss := bestLoss, bestLoss

 	upperNode.Use_not = true

-	var leftN regressorNode
-	var rightN regressorNode
+	var leftN, rightN regressorNode
 	// Iterate over all features
 	for i := 0; i < numFeatures; i++ {
-		featureVal := regressorGetFeature(data, int64(i))
-		unique := regressorFindUnique(featureVal)
+
+		featureVal := getFeature(data, int64(i))
+		unique := findUnique(featureVal)
 		sort.Float64s(unique)
-		numUnique := len(unique)

 		sortData, sortY := regressorReOrderData(featureVal, data, y)

@ -252,49 +205,36 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 		var left, right [][]float64
 		var lefty, righty []float64

-		for j := range unique {
-			if j != (numUnique - 1) {
-				threshold := (unique[j] + unique[j+1]) / 2
-				if regressorValidate(tree.triedSplits, int64(i), threshold) {
-					if firstTime {
-						left, right, lefty, righty = regressorCreateSplit(sortData, int64(i), sortY, threshold)
-						firstTime = false
-					} else {
-						left, lefty, right, righty = regressorUpdateSplit(left, lefty, right, righty, int64(i), threshold)
-					}
-
-					var leftLoss float64
-					var rightLoss float64
-					var leftPred float64
-					var rightPred float64
-
-					if criterion == MAE {
-						leftLoss, leftPred = maeImpurity(lefty)
-						rightLoss, rightPred = maeImpurity(righty)
-					} else if criterion == MSE {
-						leftLoss, leftPred = mseImpurity(lefty)
-						rightLoss, rightPred = mseImpurity(righty)
-					}
-
-					subLoss := (leftLoss * float64(len(left)) / float64(numData)) + (rightLoss * float64(len(right)) / float64(numData))
-
-					if subLoss < bestLoss {
-						bestLoss = subLoss
-						bestLeft = left
-						bestRight = right
-						bestLefty = lefty
-						bestRighty = righty
-						upperNode.Threshold = threshold
-						upperNode.Feature = int64(i)
-
-						upperNode.LeftPred = leftPred
-						upperNode.RightPred = rightPred
-
-						bestLeftLoss = leftLoss
-						bestRightLoss = rightLoss
-					}
+		for j := 0; j < len(unique)-1; j++ {
+			threshold := (unique[j] + unique[j+1]) / 2
+			if validate(tree.triedSplits, int64(i), threshold) {
+				if firstTime {
+					left, right, lefty, righty = regressorCreateSplit(sortData, int64(i), sortY, threshold)
+					firstTime = false
+				} else {
+					left, lefty, right, righty = regressorUpdateSplit(left, lefty, right, righty, int64(i), threshold)
 				}

+				var leftLoss, rightLoss float64
+				var leftPred, rightPred float64
+
+				leftLoss, leftPred = calculateRegressionLoss(lefty, criterion)
+				rightLoss, rightPred = calculateRegressionLoss(righty, criterion)
+
+				subLoss := (leftLoss * float64(len(left)) / float64(numData)) + (rightLoss * float64(len(right)) / float64(numData))
+
+				if subLoss < bestLoss {
+					bestLoss = subLoss
+
+					bestLeft, bestRight = left, right
+					bestLefty, bestRighty = lefty, righty
+
+					upperNode.Threshold, upperNode.Feature = threshold, int64(i)
+
+					upperNode.LeftPred, upperNode.RightPred = leftPred, rightPred
+
+					bestLeftLoss, bestRightLoss = leftLoss, rightLoss
+				}
 			}
 		}
 	}
@ -312,19 +252,16 @@ func regressorBestSplit(tree CARTDecisionTreeRegressor, data [][]float64, y []fl
 			if leftN.Use_not == true {
 				upperNode.Left = &leftN
 			}
-
 		}
+
 		if bestRightLoss > 0 {
 			tree.triedSplits = append(tree.triedSplits, []float64{float64(upperNode.Feature), upperNode.Threshold})
 			rightN = regressorBestSplit(tree, bestRight, bestRighty, rightN, criterion, maxDepth, depth)
 			if rightN.Use_not == true {
 				upperNode.Right = &rightN
 			}
-
 		}
-
 	}
-
 	return upperNode
 }

@ -349,20 +286,17 @@ func regressorPrintTreeFromNode(tree regressorNode, spacing string) string {
 		returnString += fmt.Sprintf("%.3f", tree.LeftPred) + "\n"
 	}
 	if tree.Right == nil {
-
 		returnString += spacing + "---> False" + "\n"
 		returnString += "  " + spacing + "PREDICT    "
 		returnString += fmt.Sprintf("%.3f", tree.RightPred) + "\n"
 	}

 	if tree.Left != nil {
-		// fmt.Println(spacing + "---> True")
 		returnString += spacing + "---> True" + "\n"
 		returnString += regressorPrintTreeFromNode(*tree.Left, spacing+"  ")
 	}

 	if tree.Right != nil {
-		// fmt.Println(spacing + "---> False")
 		returnString += spacing + "---> False" + "\n"
 		returnString += regressorPrintTreeFromNode(*tree.Right, spacing+"  ")
 	}
--- a/trees/cart_test.go
+++ b/trees/cart_test.go
@ -38,10 +38,10 @@ func TestRegressor(t *testing.T) {
 		So(len(righty), ShouldEqual, 2)

 		// Is isolating unique values working properly
-		So(len(classifierFindUnique([]float64{10, 1, 1})), ShouldEqual, 2)
+		So(len(findUnique([]float64{10, 1, 1})), ShouldEqual, 2)

 		// is data reordered correctly
-		orderedData, orderedY := classifierReOrderData(classifierGetFeature(classifierData, 1), classifierData, classifiery)
+		orderedData, orderedY := classifierReOrderData(getFeature(classifierData, 1), classifierData, classifiery)
 		fmt.Println(orderedData)
 		fmt.Println(orderedY)
 		So(orderedData[1][1], ShouldEqual, 3.0)
@ -85,11 +85,8 @@ func TestRegressor(t *testing.T) {
 		So(len(rightData), ShouldEqual, 2)
 		So(len(righty), ShouldEqual, 2)

-		// Is isolating unique values working properly
-		So(len(regressorFindUnique([]float64{10, 1, 1})), ShouldEqual, 2)
-
 		// is data reordered correctly
-		regressorOrderedData, regressorOrderedY := regressorReOrderData(regressorGetFeature(data, 1), data, y)
+		regressorOrderedData, regressorOrderedY := regressorReOrderData(getFeature(data, 1), data, y)

 		So(regressorOrderedData[1][1], ShouldEqual, 3.0)
 		So(regressorOrderedY[0], ShouldEqual, 2)
--- a/trees/cart_utils.go
+++ b/trees/cart_utils.go
@ -0,0 +1,74 @@
+package trees
+
+import (
+	"github.com/sjwhitworth/golearn/base"
+)
+
+// Helper Function to check if data point is unique or not.
+// We will use this to isolate unique values of a feature
+func stringInSlice(a float64, list []float64) bool {
+	for _, b := range list {
+		if b == a {
+			return true
+		}
+	}
+	return false
+}
+
+// Isolate only unique values. This way, we can try only unique splits and not redundant ones.
+func findUnique(data []float64) []float64 {
+	var unique []float64
+	for i := range data {
+		if !stringInSlice(data[i], unique) {
+			unique = append(unique, data[i])
+		}
+	}
+	return unique
+}
+
+// Isolate only the feature being considered for splitting. Reduces the complexity in managing splits.
+func getFeature(data [][]float64, feature int64) []float64 {
+	var featureVals []float64
+	for i := range data {
+		featureVals = append(featureVals, data[i][feature])
+	}
+	return featureVals
+}
+
+// Make sure that split being considered has not been done before.
+// Else we will unnecessarily try splits that won't improve Impurity.
+func validate(triedSplits [][]float64, feature int64, threshold float64) bool {
+	for i := range triedSplits {
+		split := triedSplits[i]
+		featureTried, thresholdTried := split[0], split[1]
+		if int64(featureTried) == feature && thresholdTried == threshold {
+			return false
+		}
+	}
+	return true
+}
+
+// Helper function to convert base.FixedDataGrid into required format. Called in Fit, Predict
+func convertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
+	// Allocate problem array
+	_, rows := X.Size()
+	problemVec := make([][]float64, rows)
+
+	// Retrieve numeric non-class Attributes
+	numericAttrs := base.NonClassFloatAttributes(X)
+	numericAttrSpecs := base.ResolveAttributes(X, numericAttrs)
+
+	// Convert each row
+	X.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
+		// Allocate a new row
+		probRow := make([]float64, len(numericAttrSpecs))
+		// Read out the row
+		for i, _ := range numericAttrSpecs {
+			probRow[i] = base.UnpackBytesToFloat(row[i])
+		}
+		// Add the row
+		problemVec[rowNo] = probRow
+		return true, nil
+	})
+	return problemVec
+}