mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-26 13:49:14 +08:00
Adding Isolation + Fixing previous import issue
This commit is contained in:
parent
6aa37aca00
commit
452acbabb3
@ -7,7 +7,6 @@ import (
|
||||
|
||||
"github.com/sjwhitworth/golearn/base"
|
||||
"github.com/sjwhitworth/golearn/trees"
|
||||
|
||||
)
|
||||
|
||||
func main() {
|
||||
@ -45,7 +44,7 @@ func main() {
|
||||
|
||||
// Create New Classification Tree
|
||||
// Hyperparameters - loss function, max Depth (-1 will split until pure), list of unique labels
|
||||
decTree := NewDecisionTreeClassifier("entropy", -1, []int64{0, 1})
|
||||
decTree := trees.NewDecisionTreeClassifier("entropy", -1, []int64{0, 1})
|
||||
|
||||
// Train Tree
|
||||
err = decTree.Fit(trainData)
|
||||
@ -72,7 +71,7 @@ func main() {
|
||||
trainRegData, testRegData := base.InstancesTrainTestSplit(regressionData, 0.5)
|
||||
|
||||
// Hyperparameters - Loss function, max Depth (-1 will split until pure)
|
||||
regTree := NewDecisionTreeRegressor("mse", -1)
|
||||
regTree := trees.NewDecisionTreeRegressor("mse", -1)
|
||||
|
||||
// Train Tree
|
||||
err = regTree.Fit(trainRegData)
|
218
trees/isolation.go
Normal file
218
trees/isolation.go
Normal file
@ -0,0 +1,218 @@
|
||||
package trees
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/sjwhitworth/golearn/base"
|
||||
)
|
||||
|
||||
type IsolationForest struct {
|
||||
nTrees int
|
||||
maxDepth int
|
||||
subSpace int
|
||||
trees []regressorNode
|
||||
}
|
||||
|
||||
func selectFeature(data [][]float64) int64 {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
return int64(rand.Intn(len(data[0])))
|
||||
}
|
||||
|
||||
func minMax(feature int64, data [][]float64) (float64, float64) {
|
||||
|
||||
var min, max float64
|
||||
|
||||
min = math.Inf(1)
|
||||
max = math.Inf(-1)
|
||||
for _, instance := range data {
|
||||
if instance[feature] > max {
|
||||
max = instance[feature]
|
||||
}
|
||||
if instance[feature] < min {
|
||||
min = instance[feature]
|
||||
}
|
||||
}
|
||||
|
||||
return min, max
|
||||
}
|
||||
|
||||
func selectValue(min, max float64) float64 {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
val := min + (rand.Float64() * (max - min))
|
||||
if val == min {
|
||||
val += 0.000001
|
||||
} else if val == max {
|
||||
val -= 0.000001
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
func splitData(val float64, feature int64, data [][]float64) ([][]float64, [][]float64) {
|
||||
var leftData, rightData [][]float64
|
||||
for _, instance := range data {
|
||||
if instance[feature] <= val {
|
||||
leftData = append(leftData, instance)
|
||||
} else {
|
||||
rightData = append(rightData, instance)
|
||||
}
|
||||
}
|
||||
return leftData, rightData
|
||||
}
|
||||
|
||||
func checkData(data [][]float64) bool {
|
||||
for _, instance := range data {
|
||||
for i, val := range instance {
|
||||
if val != data[0][i] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func buildTree(data [][]float64, upperNode regressorNode, depth int, maxDepth int) regressorNode {
|
||||
depth++
|
||||
|
||||
upperNode.isNodeNeeded = true
|
||||
if (depth > maxDepth) || (len(data) <= 1) || !checkData(data) {
|
||||
upperNode.isNodeNeeded = false
|
||||
return upperNode
|
||||
}
|
||||
|
||||
var featureToSplit int64
|
||||
var splitVal float64
|
||||
var min, max float64
|
||||
min, max = 0.0, 0.0
|
||||
|
||||
for min == max {
|
||||
featureToSplit = selectFeature(data)
|
||||
min, max = minMax(featureToSplit, data)
|
||||
splitVal = selectValue(min, max)
|
||||
}
|
||||
|
||||
leftData, rightData := splitData(splitVal, featureToSplit, data)
|
||||
|
||||
upperNode.Feature = featureToSplit
|
||||
upperNode.Threshold = splitVal
|
||||
upperNode.LeftPred = float64(len(leftData))
|
||||
upperNode.RightPred = float64(len(rightData))
|
||||
|
||||
var leftNode, rightNode regressorNode
|
||||
leftNode = buildTree(leftData, leftNode, depth, maxDepth)
|
||||
rightNode = buildTree(rightData, rightNode, depth, maxDepth)
|
||||
|
||||
if leftNode.isNodeNeeded {
|
||||
upperNode.Left = &leftNode
|
||||
}
|
||||
if rightNode.isNodeNeeded {
|
||||
upperNode.Right = &rightNode
|
||||
}
|
||||
|
||||
return upperNode
|
||||
}
|
||||
|
||||
func getRandomData(data [][]float64, subSpace int) [][]float64 {
|
||||
var randomData [][]float64
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
for i := 0; i < subSpace; i++ {
|
||||
randomData = append(randomData, data[rand.Intn(len(data))])
|
||||
}
|
||||
return randomData
|
||||
}
|
||||
|
||||
func NewIsolationForest(nTrees int, maxDepth int, subSpace int) IsolationForest {
|
||||
var iForest IsolationForest
|
||||
iForest.nTrees = nTrees
|
||||
iForest.maxDepth = maxDepth
|
||||
iForest.subSpace = subSpace
|
||||
return iForest
|
||||
}
|
||||
|
||||
func (iForest *IsolationForest) Fit(X base.FixedDataGrid) {
|
||||
data := preprocessData(X)
|
||||
nTrees := iForest.nTrees
|
||||
subSpace := iForest.subSpace
|
||||
maxDepth := iForest.maxDepth
|
||||
|
||||
var forest []regressorNode
|
||||
for i := 0; i < nTrees; i++ {
|
||||
subData := getRandomData(data, subSpace)
|
||||
var tree regressorNode
|
||||
|
||||
tree = buildTree(subData, tree, 0, maxDepth)
|
||||
forest = append(forest, tree)
|
||||
}
|
||||
iForest.trees = forest
|
||||
}
|
||||
|
||||
func pathLength(tree regressorNode, instance []float64, path float64) float64 {
|
||||
path++
|
||||
|
||||
if instance[tree.Feature] <= tree.Threshold {
|
||||
if tree.Left == nil {
|
||||
if tree.LeftPred <= 1 {
|
||||
return path
|
||||
} else {
|
||||
return path + cFactor(int(tree.LeftPred))
|
||||
}
|
||||
}
|
||||
path = pathLength(*tree.Left, instance, path)
|
||||
} else {
|
||||
if tree.Right == nil {
|
||||
if tree.RightPred <= 1 {
|
||||
return path
|
||||
} else {
|
||||
return path + cFactor(int(tree.RightPred))
|
||||
}
|
||||
}
|
||||
path = pathLength(*tree.Right, instance, path)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func evaluateInstance(instance []float64, forest []regressorNode) []float64 {
|
||||
var paths []float64
|
||||
for _, tree := range forest {
|
||||
paths = append(paths, pathLength(tree, instance, 0))
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
func cFactor(n int) float64 {
|
||||
return 2.0*(math.Log(float64(n-1))+0.5772156649) - (float64(2.0*(n-1)) / float64(n))
|
||||
}
|
||||
|
||||
func anomalyScore(instance []float64, forest []regressorNode, n int) float64 {
|
||||
paths := evaluateInstance(instance, forest)
|
||||
E := 0.0
|
||||
for _, path := range paths {
|
||||
E += path
|
||||
}
|
||||
E /= float64(len(paths))
|
||||
c := cFactor(n)
|
||||
return math.Pow(2, (-1 * E / c))
|
||||
}
|
||||
func (iForest *IsolationForest) Predict(X base.FixedDataGrid) []float64 {
|
||||
data := preprocessData(X)
|
||||
|
||||
var preds []float64
|
||||
for _, instance := range data {
|
||||
score := anomalyScore(instance, iForest.trees, iForest.subSpace)
|
||||
preds = append(preds, score)
|
||||
}
|
||||
return preds
|
||||
}
|
||||
|
||||
func preprocessData(X base.FixedDataGrid) [][]float64 {
|
||||
data := convertInstancesToProblemVec(X)
|
||||
class, err := regressorConvertInstancesToLabelVec(X)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for i, point := range class {
|
||||
data[i] = append(data[i], point)
|
||||
}
|
||||
return data
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user