1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-26 13:49:14 +08:00
golearn/examples/trees/isolationForest/isolation_forest.go
2020-08-27 18:00:37 +05:30

58 lines
1.8 KiB
Go

// Example of how to use Isolation Forest for outlier detection
package main
import (
"fmt"
"github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/trees"
)
func main() {
/* Isolation Forest is used for outlier detection
The algorithm works by randomly splitting the data, so results won't be exactly reproducible
but generally outliers will still be classified as outliers. */
// Load data for outlier detection - includes gaussian distribution, and ten outliers at the end
// Dataset has 1000 normal datapoints, and 10 outliers at the ned
csvData, err := base.ParseCSVToInstances("../datasets/gaussian_outliers.csv", true)
if err != nil {
panic(err)
}
// Create New Isolation Forest with 100 trees, max depth 100, and each tree will use 850 datapoints
forest := trees.NewIsolationForest(100, 100, 850)
// fit the isolation forest to the data. Note that all class attributes are also used during training.
// Remove all class attributes you don't want to use before calling fit.
forest.Fit(csvData)
// Make predictions. Generally, IsolationForest is used for Interpolation, not Extrapolation.
// Predictions are returned as Anomaly Scores from 0 to 1. close to 0 - not outlier, close to 1 - outlier
preds := forest.Predict(csvData)
// Let's find the average and minimum Anomaly Score for normal data
var avgScore float64
var min float64
min = 1
for i := 0; i < 1000; i++ {
temp := preds[i]
avgScore += temp
if temp < min {
min = temp
}
}
fmt.Println(avgScore / 1000)
fmt.Println(min)
// Now let's print the anomaly scores for the outliers.
// You should find that these values are much higher (around 0.7) as comapred to the scores for normal data.
fmt.Println("Anomaly Scores for outliers are ")
for i := 1000; i < 1010; i++ {
fmt.Print(" ")
fmt.Println(preds[i])
}
}