mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-26 13:49:14 +08:00
clustering: creates the package and implements DBSCAN
Verified against scikit-learn's implementation (gen_test.py)
This commit is contained in:
parent
6ed783530a
commit
986cd230f9
69
base/conversion.go
Normal file
69
base/conversion.go
Normal file
@ -0,0 +1,69 @@
|
||||
package base
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gonum/matrix/mat64"
|
||||
)
|
||||
|
||||
func checkAllAttributesAreFloat(attrs []Attribute) error {
|
||||
// Check that all the attributes are float
|
||||
for _, a := range attrs {
|
||||
if _, ok := a.(*FloatAttribute); !ok {
|
||||
fmt.Errorf("All []Attributes to this method must be FloatAttributes")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ConvertRowToMat64 takes a list of Attributes, a FixedDataGrid
|
||||
// and a row number, and returns the float values of that row
|
||||
// in a mat64.Dense format.
|
||||
func ConvertRowToMat64(attrs []Attribute, f FixedDataGrid, r int) (*mat64.Dense, error) {
|
||||
|
||||
err := checkAllAttributesAreFloat(attrs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Allocate the return value
|
||||
ret := mat64.NewDense(1, len(attrs), nil)
|
||||
|
||||
// Resolve all the attributes
|
||||
attrSpecs := ResolveAttributes(f, attrs)
|
||||
|
||||
// Get the results
|
||||
for i, a := range attrSpecs {
|
||||
ret.Set(0, i, UnpackBytesToFloat(f.Get(a, r)))
|
||||
}
|
||||
|
||||
// Return the result
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// ConvertAllRowsToMat64 takes a list of Attributes and returns a vector
|
||||
// of all rows in a mat64.Dense format.
|
||||
func ConvertAllRowsToMat64(attrs []Attribute, f FixedDataGrid) ([]*mat64.Dense, error) {
|
||||
|
||||
// Check for floats
|
||||
err := checkAllAttributesAreFloat(attrs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Return value
|
||||
_, rows := f.Size()
|
||||
ret := make([]*mat64.Dense, rows)
|
||||
|
||||
// Resolve all attributes
|
||||
attrSpecs := ResolveAttributes(f, attrs)
|
||||
|
||||
// Set the values in each return value
|
||||
for i := 0; i < rows; i++ {
|
||||
cur := mat64.NewDense(1, len(attrs), nil)
|
||||
for j, a := range attrSpecs {
|
||||
cur.Set(0, j, UnpackBytesToFloat(f.Get(a, i)))
|
||||
}
|
||||
ret[i] = cur
|
||||
}
|
||||
return ret, nil
|
||||
}
|
82
clustering/cluster_test.go
Normal file
82
clustering/cluster_test.go
Normal file
@ -0,0 +1,82 @@
|
||||
package clustering
|
||||
|
||||
import (
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestClusterEquality(t *testing.T) {
|
||||
|
||||
Convey("Should be able to determine if two cluster maps represent the same thing...", t, func() {
|
||||
|
||||
Convey("When everything's exactly the same...", func() {
|
||||
|
||||
m1 := ClusterMap(make(map[int][]int))
|
||||
m1[0] = []int{1, 2, 3}
|
||||
m1[1] = []int{4, 5}
|
||||
|
||||
m2 := ClusterMap(make(map[int][]int))
|
||||
m2[0] = []int{1, 2, 3}
|
||||
m2[1] = []int{4, 5}
|
||||
|
||||
ret, err := m1.Equals(m2)
|
||||
So(err, ShouldBeNil)
|
||||
So(ret, ShouldBeTrue)
|
||||
|
||||
})
|
||||
|
||||
Convey("With re-labelled clusters...", func() {
|
||||
m1 := ClusterMap(make(map[int][]int))
|
||||
m1[1] = []int{1, 2, 3}
|
||||
m1[0] = []int{4, 5}
|
||||
|
||||
m2 := ClusterMap(make(map[int][]int))
|
||||
m2[1] = []int{1, 2, 3}
|
||||
m2[0] = []int{4, 5}
|
||||
|
||||
ret, err := m1.Equals(m2)
|
||||
So(err, ShouldBeNil)
|
||||
So(ret, ShouldBeTrue)
|
||||
})
|
||||
|
||||
Convey("With missing clusters...", func() {
|
||||
m1 := ClusterMap(make(map[int][]int))
|
||||
m1[1] = []int{1, 2, 3}
|
||||
|
||||
m2 := ClusterMap(make(map[int][]int))
|
||||
m2[1] = []int{1, 2, 3}
|
||||
m2[0] = []int{4, 5}
|
||||
|
||||
_, err := m1.Equals(m2)
|
||||
So(err, ShouldNotBeNil)
|
||||
})
|
||||
|
||||
Convey("With missing points...", func() {
|
||||
m1 := ClusterMap(make(map[int][]int))
|
||||
m1[1] = []int{1, 3}
|
||||
m1[0] = []int{4, 5}
|
||||
|
||||
m2 := ClusterMap(make(map[int][]int))
|
||||
m2[1] = []int{1, 2, 3}
|
||||
m2[0] = []int{4, 5}
|
||||
|
||||
_, err := m1.Equals(m2)
|
||||
So(err, ShouldNotBeNil)
|
||||
})
|
||||
|
||||
Convey("With invalid maps...", func() {
|
||||
m1 := ClusterMap(make(map[int][]int))
|
||||
m1[0] = []int{1, 2, 3}
|
||||
m1[1] = []int{4, 4, 5}
|
||||
|
||||
m2 := ClusterMap(make(map[int][]int))
|
||||
m2[0] = []int{1, 2, 3}
|
||||
m2[1] = []int{4, 5}
|
||||
|
||||
_, err := m1.Equals(m2)
|
||||
So(err, ShouldNotBeNil)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
}
|
107
clustering/clustering.go
Normal file
107
clustering/clustering.go
Normal file
@ -0,0 +1,107 @@
|
||||
/* This package implements clustering algorithms */
|
||||
package clustering
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/sjwhitworth/golearn/base"
|
||||
"github.com/sjwhitworth/golearn/metrics/pairwise"
|
||||
)
|
||||
|
||||
// ClusterParameters takes a number of variables common to all clustering
|
||||
// algorithms.
|
||||
|
||||
type ClusterParameters struct {
|
||||
// Attributes represents the set of Attributes which
|
||||
// can be used for clustering
|
||||
Attributes []base.Attribute
|
||||
|
||||
// Metric is used to compute pairwise distance
|
||||
Metric pairwise.PairwiseDistanceFunc
|
||||
}
|
||||
|
||||
// ClusterMap contains the cluster identifier as a key, followed by a vector of point
|
||||
// indices that cluster contains.
|
||||
type ClusterMap map[int][]int
|
||||
|
||||
// Invert returns an alternative form of cluster map where the key represents the point
|
||||
// index and the value represents the cluster index it's assigned to
|
||||
func (ref ClusterMap) Invert() (map[int]int, error) {
|
||||
ret := make(map[int]int)
|
||||
for c := range ref {
|
||||
for _, p := range ref[c] {
|
||||
if _, ok := ret[p]; ok {
|
||||
return nil, fmt.Errorf("Not a valid cluster map (points appear in more than one cluster)")
|
||||
} else {
|
||||
ret[p] = c
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// Equals checks whether a bijection exists between two ClusterMaps (i.e. the clusters in one can
|
||||
// be re-labelled to become the clusters of another)
|
||||
func (ref ClusterMap) Equals(other ClusterMap) (bool, error) {
|
||||
if len(ref) != len(other) {
|
||||
return false, fmt.Errorf("ref and other do not contain the same number of clusters (%d and %d)", len(ref), len(other))
|
||||
}
|
||||
|
||||
refInv, err := ref.Invert()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("ref: %s", err)
|
||||
}
|
||||
|
||||
otherInv, err := other.Invert()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("other: %s", err)
|
||||
}
|
||||
|
||||
clusterIdMap := make(map[int]int)
|
||||
|
||||
// Range through each point index
|
||||
for p := range refInv {
|
||||
c1 := refInv[p] // Get the cluster index of this point
|
||||
if c2, ok := otherInv[p]; ok { // Check if the other map has this point
|
||||
// if so, c2 is the point's cluster in the other map
|
||||
if c3, ok := clusterIdMap[c2]; ok { // what's our correspondance with c2?
|
||||
if c1 != c3 {
|
||||
// if c1 is not what we've currently got, error out
|
||||
return false, fmt.Errorf("ref point %d (cluster %d) is assigned to a different cluster (%d) in ref %s", p, c2, c1, clusterIdMap)
|
||||
}
|
||||
} else {
|
||||
clusterIdMap[c2] = c1
|
||||
}
|
||||
} else {
|
||||
return false, fmt.Errorf("failed to find reference point %d in src", p)
|
||||
}
|
||||
}
|
||||
|
||||
// Check that after transformation, key contains the same points
|
||||
arraysEqual := func(a1, a2 []int) bool {
|
||||
|
||||
cnt := make(map[int]bool)
|
||||
for _, a := range a1 {
|
||||
cnt[a] = true
|
||||
}
|
||||
|
||||
for _, a := range a2 {
|
||||
if _, ok := cnt[a]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
|
||||
}
|
||||
newMap := ClusterMap(make(map[int][]int))
|
||||
for cOld := range other {
|
||||
cNew := clusterIdMap[cOld]
|
||||
if !arraysEqual(ref[cNew], other[cOld]) {
|
||||
return false, fmt.Errorf("Re-labelled cluster %d => %d doesn't contain the same points (%s, %s)", cOld, cNew, ref[cNew], other[cOld])
|
||||
}
|
||||
newMap[cNew] = other[cOld]
|
||||
}
|
||||
|
||||
return true, nil
|
||||
|
||||
}
|
750
clustering/dbscan.csv
Normal file
750
clustering/dbscan.csv
Normal file
@ -0,0 +1,750 @@
|
||||
0.494260967249,1.45106696541
|
||||
-1.42808099324,-0.83706376669
|
||||
0.338559182384,1.03875870939
|
||||
0.119001013781,-1.05397553336
|
||||
1.12242460445,1.77493654436
|
||||
-1.26156989707,0.271881354299
|
||||
-1.30154774626,-0.762062025148
|
||||
0.585698651521,-0.339104628157
|
||||
1.08247212014,0.886855396912
|
||||
1.01416667809,1.34114022391
|
||||
-1.21578195893,-0.601021238858
|
||||
-1.25021782593,-1.05761650335
|
||||
-1.05160415572,-0.780084156141
|
||||
1.15263449272,-0.648539905918
|
||||
-0.783299140581,-1.2248966985
|
||||
0.202587147419,1.61104848936
|
||||
-1.43020789851,-1.82380067733
|
||||
-0.916300845616,-0.480830396598
|
||||
-0.506013825832,-0.295715454174
|
||||
0.436426179395,-1.06597144351
|
||||
0.468034167368,-0.974110220304
|
||||
0.522354793098,-0.641695891625
|
||||
0.94533367495,-0.543880951202
|
||||
0.94661473578,-0.939854758443
|
||||
-1.38551398913,-0.73950655252
|
||||
-1.15374916281,-0.250507932367
|
||||
0.493572698047,-0.949825244593
|
||||
0.884913340754,1.66591701207
|
||||
0.249587300835,1.57229126004
|
||||
1.02800263162,-0.340081504198
|
||||
0.478275464063,1.19798226443
|
||||
-1.19268844384,-0.510240121174
|
||||
-1.85804701232,-1.33021784213
|
||||
0.528139618545,1.32892750576
|
||||
-0.918024481532,-0.652157357893
|
||||
0.756316701741,0.920633635328
|
||||
0.855048505014,-0.481028310004
|
||||
0.492824086051,1.78274421923
|
||||
0.380510951332,1.24884772379
|
||||
-0.166999182256,-0.0916528008137
|
||||
0.862512958934,-0.29122649879
|
||||
-1.28326220483,-0.63402691263
|
||||
-1.46013480318,-0.722834729597
|
||||
-1.48000289758,-1.09948040102
|
||||
-2.19020872323,-0.630588973627
|
||||
-1.07505211635,-0.474050249508
|
||||
0.541969904427,1.03090707759
|
||||
0.824488329821,-0.264039880782
|
||||
0.456263169078,2.05788223562
|
||||
-1.58709404439,-0.54480731903
|
||||
1.32708272612,-0.345071514843
|
||||
0.68614239282,-0.490086592009
|
||||
-1.60725507262,0.070747440379
|
||||
-1.53337705952,-0.570087546452
|
||||
1.0491125845,-0.574435960384
|
||||
0.731933094085,-0.608068176075
|
||||
-1.13848133348,-0.0659881431468
|
||||
1.36805202458,1.65962813336
|
||||
0.222462580182,-0.65053906069
|
||||
-1.18662195919,-0.78239641499
|
||||
0.357717455186,-0.584924154569
|
||||
0.588086269107,-0.230283609581
|
||||
0.78242146637,-0.380417760077
|
||||
1.2682093931,-0.857019912656
|
||||
0.549567992097,-0.773931305337
|
||||
0.981410379535,1.01828533931
|
||||
0.707839055866,-0.233211620345
|
||||
0.0165651739637,-0.923844177798
|
||||
0.158530593126,1.68427935414
|
||||
0.498933328512,1.18944226235
|
||||
0.394392460137,1.10697668799
|
||||
0.52298152277,-0.915281143053
|
||||
0.363168115217,1.90748256868
|
||||
0.346568780252,1.26411862836
|
||||
0.966039504954,-0.4318119363
|
||||
-1.14222916165,-0.398461611165
|
||||
-0.134479180583,2.11039748445
|
||||
-1.18845711973,0.191151161919
|
||||
0.235515043844,1.71737552151
|
||||
0.648790787207,-0.936837517765
|
||||
-1.58852748366,-0.819181976895
|
||||
-1.04572997888,-0.29002720873
|
||||
0.467505726335,0.450459334368
|
||||
0.0198833944692,1.48714816824
|
||||
0.189992256516,1.10986299053
|
||||
1.48201717596,1.82713555691
|
||||
-1.30489683944,-1.15150866165
|
||||
0.757809431355,-0.47686276961
|
||||
-1.54387743826,-0.684212390528
|
||||
0.53240786142,-0.776648241672
|
||||
0.85665850455,-1.34594223446
|
||||
0.403144558116,1.57028295161
|
||||
-1.3011171994,-0.790729653327
|
||||
0.972620490761,1.21000471162
|
||||
-1.00025584409,-0.628924362444
|
||||
1.22425496262,0.501610912038
|
||||
-1.15175818324,0.22764659828
|
||||
-1.31816425788,-0.630999410835
|
||||
0.402531346597,1.15248839326
|
||||
0.0906743459729,1.61848052292
|
||||
0.598794476009,-0.744251645998
|
||||
-1.37198702139,-0.980218172223
|
||||
0.520218965558,-0.919220905523
|
||||
0.631969327359,1.19544068432
|
||||
0.728113832873,-0.518758002884
|
||||
0.262658464722,0.0128713235313
|
||||
1.01826270251,-0.800567265699
|
||||
1.0896513853,-0.503675186289
|
||||
1.53624088423,0.894604885123
|
||||
0.511997776458,0.678078694437
|
||||
1.68745105198,1.27830755696
|
||||
-1.84237360674,-0.904437839063
|
||||
-1.19200811061,-0.463511666939
|
||||
-1.29275263692,0.287881967384
|
||||
-2.03126575898,-0.895274949124
|
||||
0.529118462695,0.654914838633
|
||||
0.468283787666,0.755733587995
|
||||
-1.638498618,-0.154707320244
|
||||
0.605617236401,1.70403704905
|
||||
-0.767697521224,-1.01384394922
|
||||
0.421112557426,-0.813005680016
|
||||
-1.1727392859,-0.0801023370369
|
||||
0.763176137366,1.82318913399
|
||||
-0.0334381403655,1.44539596918
|
||||
-1.60758525806,-0.62956732394
|
||||
0.72250888945,-0.367506703588
|
||||
-1.48527973153,-0.62861576205
|
||||
0.978478897202,1.05374904006
|
||||
0.451784483015,1.13661154122
|
||||
1.27710347995,-0.491509617737
|
||||
0.7166105877,1.15073382716
|
||||
0.705050630765,-1.01884736371
|
||||
0.535813899767,-1.31595906212
|
||||
0.279302786611,-1.16319317603
|
||||
0.29795190705,1.14196446938
|
||||
-1.5319923175,-1.74146843932
|
||||
0.485447620689,-0.597755525309
|
||||
0.407314491616,-0.790408883348
|
||||
0.381481488856,1.90489980312
|
||||
-1.60594123991,-0.76522411796
|
||||
1.23408760826,1.97619040399
|
||||
0.909343480925,-0.618337223907
|
||||
0.495887533633,0.855925046745
|
||||
0.793312516951,0.879279610882
|
||||
0.346669837831,-0.395258378353
|
||||
0.463120268974,-0.842105995666
|
||||
-0.422275985459,-0.190344559422
|
||||
0.938840781419,-0.223971270792
|
||||
-1.58434365981,-0.845357036129
|
||||
1.52307352239,0.741157517894
|
||||
0.473203974657,-0.605056119142
|
||||
-1.33430726419,-0.787153064395
|
||||
-1.30774613959,-0.537830906671
|
||||
0.44437726176,-0.570907450386
|
||||
0.302728842099,-1.4022293954
|
||||
0.498614426707,-0.661820178158
|
||||
1.02546663264,2.17903746819
|
||||
-0.888963724459,-0.894519799863
|
||||
-0.0094375858741,2.06614833436
|
||||
-1.259326547,-1.33666248485
|
||||
0.334806319729,0.635350614538
|
||||
-2.0514671874,-0.491853069487
|
||||
0.511781097662,0.772058829646
|
||||
0.635381289585,-1.23415961512
|
||||
0.840452136147,-0.925641488461
|
||||
-0.46307453491,-1.26531794688
|
||||
-1.37224990492,-0.0477233997811
|
||||
0.128494145161,-0.146277558271
|
||||
0.629212436152,-0.545489790799
|
||||
-1.28799441742,-0.218570654523
|
||||
0.638847594716,1.0198939832
|
||||
-1.90824567176,-1.24854294321
|
||||
0.983925587407,-0.980132673476
|
||||
0.751915912284,-0.434247990685
|
||||
0.246162045698,-0.972003120401
|
||||
-1.42184967713,-1.00645441438
|
||||
-1.36258687372,-0.465192195174
|
||||
0.729107773809,-1.12124670875
|
||||
1.28828508776,-1.18972269812
|
||||
0.936218595433,0.844436650383
|
||||
-1.41967242002,-1.33553338128
|
||||
0.451293435185,-0.337043043077
|
||||
0.889211776584,0.683688380936
|
||||
0.946264899744,0.846407250351
|
||||
0.516908027375,-1.13002059107
|
||||
0.663113490975,0.662420359006
|
||||
0.985803048039,1.26228271875
|
||||
-1.4124239618,-0.947706065026
|
||||
0.642179325842,1.36969227279
|
||||
-1.32320503558,-0.518361624408
|
||||
0.389031988291,1.16716527963
|
||||
-0.806854584638,-0.613264833433
|
||||
-0.73049432945,-0.484378149065
|
||||
0.493548378749,-0.761716569457
|
||||
0.118175433165,-0.443557808199
|
||||
1.00315780403,1.4310943891
|
||||
0.778850340762,2.09349071844
|
||||
-0.745033802864,-0.756441323796
|
||||
-0.93389892072,-0.103482424997
|
||||
0.68196176411,-0.273220993773
|
||||
-1.16459401764,-0.315541399223
|
||||
0.740399605464,-0.0945591684424
|
||||
0.856407754419,0.252753351451
|
||||
0.803410992909,-1.32952562448
|
||||
0.429896355505,-0.758228537429
|
||||
0.595823625156,1.74945400458
|
||||
1.02085295004,-0.440804557414
|
||||
0.30307695482,1.45762223084
|
||||
1.18958904168,-0.581519032443
|
||||
0.96915905519,-0.511234999414
|
||||
0.697140552761,1.46349275366
|
||||
0.637227696862,-0.764858659877
|
||||
1.35045914484,-0.667938023256
|
||||
0.250651256786,-1.19493208012
|
||||
1.28347766291,1.37097619103
|
||||
-0.128975958296,1.09716295281
|
||||
-1.7517528787,-0.262053681114
|
||||
-1.3635857203,-1.06031600728
|
||||
-0.904113999203,0.191818430248
|
||||
0.165426717861,-0.866647109384
|
||||
0.232203921427,-0.682948158472
|
||||
0.350368147923,-0.295280019807
|
||||
1.5427482888,-0.592939512519
|
||||
-1.13795423209,-0.133498274187
|
||||
0.674237889386,-0.632143914378
|
||||
0.334556478351,-1.20237442694
|
||||
0.528396459186,1.34497258643
|
||||
0.268370506258,0.734359941775
|
||||
0.309361881005,-0.728426362716
|
||||
0.917435744228,1.30854004814
|
||||
0.428789300542,1.41209652083
|
||||
0.199130767118,1.65759766562
|
||||
-1.17444696491,-0.950375612201
|
||||
0.597006581866,1.19119789824
|
||||
-1.45170622969,-0.891168308477
|
||||
-1.41986354849,-0.273475605125
|
||||
-1.57409699552,-0.422236366569
|
||||
1.04184264467,-0.362737479132
|
||||
-2.14219480292,-0.482272076783
|
||||
-1.50691533211,-0.200973148817
|
||||
0.0543420665276,1.33168891813
|
||||
-1.13144663461,-0.651825483298
|
||||
1.07155174333,-0.692136570485
|
||||
0.583387651839,-0.491450887858
|
||||
-1.14297733022,-0.697948095468
|
||||
0.0998245638451,0.10950372489
|
||||
0.220588982913,-0.851548705937
|
||||
-1.13730048755,-0.564448259501
|
||||
0.905073179513,1.12779984735
|
||||
0.72504167988,1.28738215218
|
||||
-1.06955320593,-0.467663188307
|
||||
-0.880265370005,-1.02614239598
|
||||
-1.44264764226,-0.96145282057
|
||||
1.01333072504,1.24675601661
|
||||
-1.0093984377,-1.05143861237
|
||||
0.507657052315,1.36804853004
|
||||
1.26502785776,-0.711979714262
|
||||
1.31608042094,1.5734222567
|
||||
0.334632982453,-0.84147974129
|
||||
0.802031438762,0.228215838939
|
||||
1.38250775401,-0.644251339858
|
||||
0.919614961822,-1.22049235391
|
||||
0.929729151417,-0.208693463261
|
||||
-1.53633104344,-0.511275317046
|
||||
-0.665051865958,-0.739115745001
|
||||
-0.335795516652,1.56140541417
|
||||
1.23901518412,1.87882199622
|
||||
-1.35543673912,-0.601849685925
|
||||
-1.15154941392,-0.269135444753
|
||||
0.608439338548,1.46684269694
|
||||
1.06006794863,1.13065360895
|
||||
0.942890187819,-0.742929110414
|
||||
-1.15672050041,-0.436145800526
|
||||
1.62198216506,0.050201317777
|
||||
0.854125246175,-0.514807506009
|
||||
-1.14337683511,-0.490935142717
|
||||
-1.51048251847,-0.0345004965754
|
||||
0.880530249926,-0.869888336327
|
||||
-1.36540418059,-0.756111150943
|
||||
0.601814512111,-1.21412505961
|
||||
-0.0621652593321,1.12108597614
|
||||
0.74067770872,-0.576648130759
|
||||
-0.183577853633,-0.125433577503
|
||||
0.417995488425,1.21449387096
|
||||
-1.1856447963,-0.984315517908
|
||||
1.07887574968,-0.840413058707
|
||||
0.090657698723,-1.25434772582
|
||||
0.0261662265887,1.22429234588
|
||||
1.13673243898,-0.444139145222
|
||||
1.23361139042,-1.09421718393
|
||||
0.351468885092,1.51690258534
|
||||
0.255831769187,1.27677830087
|
||||
0.798195414423,-0.18283188485
|
||||
1.31845143924,1.69400632284
|
||||
0.938052607202,-0.419433668128
|
||||
0.388310366276,1.31945848095
|
||||
1.00904356759,-0.374533562373
|
||||
-1.08675207316,-0.230719819714
|
||||
0.956791915728,1.33752493245
|
||||
0.964894172999,1.3091321864
|
||||
0.630607763963,1.39287553367
|
||||
-1.41288695181,-0.864681477113
|
||||
0.261119656155,-1.02691248837
|
||||
-0.882375409513,-0.666629249983
|
||||
0.989911346176,-0.744391801077
|
||||
0.867329484559,-0.768003291115
|
||||
1.10613565156,1.4303998032
|
||||
0.77134497925,-0.692113237484
|
||||
0.343526184216,-0.991545218203
|
||||
0.758591550569,1.54398289162
|
||||
0.707946435833,1.45422137588
|
||||
0.709604992056,-1.40060170714
|
||||
-1.62485869339,-0.127799648835
|
||||
-1.66703749341,0.0158250976471
|
||||
-1.80730926772,-0.301662933271
|
||||
-1.45291560869,-0.535118179264
|
||||
-1.4701829607,-0.667609031391
|
||||
0.826731842161,1.41567303436
|
||||
-1.83590114306,-1.10954151061
|
||||
-1.6332275232,-0.563497927722
|
||||
-0.7388346936,-0.798186938046
|
||||
-1.82702823377,0.13893299319
|
||||
1.08739214482,0.826583726311
|
||||
0.196057452318,2.06336452546
|
||||
-0.962783057941,-0.109325188026
|
||||
-1.19668293625,-1.1087752111
|
||||
-0.920351459366,-0.706719513233
|
||||
1.1741662534,1.0387978517
|
||||
0.489318601459,-0.795493247886
|
||||
-0.0285631715351,1.48253801626
|
||||
-1.55996778776,-0.562017909444
|
||||
0.0907181454452,-0.814517495862
|
||||
1.04873107616,-0.452078258313
|
||||
0.641663493277,1.45460629445
|
||||
0.396805058072,1.10427025972
|
||||
1.00336963075,-0.459191567668
|
||||
0.907351763777,1.46562217387
|
||||
0.904912861981,-1.62473397987
|
||||
-1.30060206226,-0.639040245494
|
||||
0.22255248672,1.32737094419
|
||||
0.41209455966,-0.958675990971
|
||||
0.941556677173,1.35441829013
|
||||
1.28361991963,-1.24163477985
|
||||
-0.376722258575,1.54300064517
|
||||
0.930527863539,-0.784505897599
|
||||
1.05101554226,-0.405406154061
|
||||
1.22185277774,2.04479129366
|
||||
-1.10897541444,-0.568930353083
|
||||
0.637361305672,1.47374301327
|
||||
-0.735046904585,-0.332733398991
|
||||
0.914105951171,1.81364038611
|
||||
0.815815323504,-0.428342552091
|
||||
0.655466878695,-0.869548902941
|
||||
-1.1045597651,-0.600408464946
|
||||
-0.915703222184,-0.742626383383
|
||||
-1.3571704177,-0.68125832152
|
||||
0.69160775897,-0.893583583689
|
||||
0.978900301359,1.75109237406
|
||||
0.53683021324,-1.41620152234
|
||||
1.09237619762,1.72716832141
|
||||
0.866591909179,-0.581572078316
|
||||
-1.80307744469,-0.65461097373
|
||||
-0.127231346916,-0.409038899099
|
||||
0.541525702451,-0.201173106705
|
||||
0.68589072527,1.53390864901
|
||||
-0.502670916098,-0.757868411152
|
||||
0.417479823257,0.872860696972
|
||||
-2.0289141946,-0.993678879688
|
||||
0.245343426191,1.77834730722
|
||||
0.316274690117,2.05030729845
|
||||
1.23151797851,1.52230461678
|
||||
0.488799329286,1.01622700328
|
||||
0.736124228521,-0.560102473907
|
||||
0.0380991755979,1.54458039477
|
||||
0.348282296735,0.0373035505291
|
||||
0.791153859839,1.36235109152
|
||||
-1.89637476785,-0.983716547448
|
||||
0.529079350094,1.21622740397
|
||||
-1.2345838948,-0.786033236307
|
||||
0.206511679327,-0.620187190429
|
||||
-1.25908731883,-0.301031125224
|
||||
-1.09843278784,0.0369549195008
|
||||
-1.10406146313,-1.35048039511
|
||||
0.983155368445,1.41480769807
|
||||
-1.7328692309,-1.08216857053
|
||||
-0.917910107541,-0.0889436794991
|
||||
0.312585483993,1.0818337627
|
||||
-0.0811644021867,-0.707691032276
|
||||
-1.20266214326,-0.217504289139
|
||||
0.454419137278,2.2457941917
|
||||
0.471831725992,-0.493824106953
|
||||
1.29161652352,-0.520992830994
|
||||
-1.25588057463,-0.721197168795
|
||||
-1.20377898567,-1.33173379489
|
||||
1.11899200093,-0.713538916105
|
||||
0.339906689497,-0.72413604985
|
||||
0.615417018996,-0.858079193557
|
||||
-1.01823258109,-0.78714664658
|
||||
0.816099854449,-0.871668345031
|
||||
-1.7212991458,-0.777848794878
|
||||
0.843019145714,-0.498712137992
|
||||
1.4021067635,1.45886382804
|
||||
0.878294256485,-1.02266917785
|
||||
-0.88512932828,-0.853503063368
|
||||
0.430259456368,-0.453270444086
|
||||
-1.77952949337,-0.141961490527
|
||||
0.849914524615,1.24032152147
|
||||
-1.32980886649,-0.481002489736
|
||||
0.624470649758,1.26531866728
|
||||
-1.06157593269,-1.13833962673
|
||||
-1.3992137138,-0.965470741462
|
||||
0.896181657602,0.695919911938
|
||||
-1.418340371,-0.224255463115
|
||||
0.0738188763056,-0.0563312160229
|
||||
1.01170961883,0.241023782153
|
||||
-1.5363281273,0.0159593515193
|
||||
0.82770781377,0.709297571031
|
||||
0.545029125045,0.868146825735
|
||||
0.94527049937,-0.689257336931
|
||||
-1.19201851393,-0.0979642908923
|
||||
0.356642444398,-0.521177720048
|
||||
1.25677847275,-0.948042349321
|
||||
0.960112654402,-1.1046969869
|
||||
0.467333609641,-0.297755148203
|
||||
-1.09928800088,-0.782568121394
|
||||
0.499876498504,1.34378633999
|
||||
-0.0980920351721,1.38052928695
|
||||
-0.233897355292,1.40492904943
|
||||
0.951304495882,1.12558216168
|
||||
-1.57107850167,-0.657989767628
|
||||
0.284198318557,1.14751633136
|
||||
1.14780923861,-0.398627857264
|
||||
-1.63748393741,-0.707992965283
|
||||
0.396760739464,1.1549469915
|
||||
-0.856392511462,-0.729638141622
|
||||
0.743336814006,-0.0447286202516
|
||||
0.213902305912,1.02275520522
|
||||
0.866879045866,1.22042656018
|
||||
-0.88179618297,-1.43514524119
|
||||
0.334722303045,0.736465317357
|
||||
-1.71828945714,-0.333062709029
|
||||
-0.918042667376,-0.843035843758
|
||||
0.929243026125,1.35726190001
|
||||
-0.431851673719,-1.10093484648
|
||||
0.703743675795,1.87295209701
|
||||
0.98717412056,-0.391248211672
|
||||
0.446786417845,-0.232663277488
|
||||
0.833397671467,-1.01523684003
|
||||
-1.31380292373,-0.106348966316
|
||||
-1.98210412488,-0.520364529607
|
||||
0.882630413465,-0.204652953696
|
||||
0.57473870386,1.15343094618
|
||||
-1.64296177795,-0.545851844001
|
||||
0.812520126446,1.57046768
|
||||
-0.221156389297,0.90920018435
|
||||
-1.31918421048,-1.02294749184
|
||||
0.756117389326,1.26888096925
|
||||
-1.00145716326,-1.06765844508
|
||||
-1.16012367924,-1.17473398971
|
||||
0.140325452005,-0.427986994764
|
||||
0.5813642278,-0.83696135172
|
||||
-0.31645030278,-1.51218920885
|
||||
0.82452917064,0.93172792002
|
||||
-0.750534982503,-0.836888860558
|
||||
0.968658108542,-0.448623907721
|
||||
1.2006923499,-0.475696442665
|
||||
-1.26717115594,-0.665599874339
|
||||
-1.82087781658,-0.868101472932
|
||||
-1.16838236627,-1.54147890288
|
||||
-0.981140298879,-1.28505380627
|
||||
0.141023068843,1.12746333408
|
||||
0.754032847532,0.960404487137
|
||||
0.202135095167,1.18555519975
|
||||
0.849908773169,-0.847682954547
|
||||
0.744968023152,-0.228079376425
|
||||
-1.91222754219,-0.796509854232
|
||||
0.775623691917,-0.695029747499
|
||||
-0.767188336951,-0.677911431003
|
||||
0.712466108841,1.55417287552
|
||||
1.21349899534,1.6388133243
|
||||
-1.0869979326,-0.648693092282
|
||||
0.699067612971,-1.40916870622
|
||||
-1.53255598882,-0.261494722161
|
||||
1.38939876357,1.88316296941
|
||||
0.596690144163,1.72643881439
|
||||
0.804964907977,-0.170902873462
|
||||
0.40613498617,1.1198979641
|
||||
-1.20807438507,-0.788501079273
|
||||
0.728500901715,1.68709745134
|
||||
0.316645956769,-0.510754409208
|
||||
-0.823618040446,-0.884384414857
|
||||
1.01442400059,1.24817740818
|
||||
0.688659017161,-0.58639380357
|
||||
0.370731358867,-0.986204337596
|
||||
-1.02050291971,-0.913802249095
|
||||
1.07231521798,1.81215231098
|
||||
0.293755472217,0.389904123007
|
||||
0.384580005797,1.95282853017
|
||||
0.731079718128,-0.600671861978
|
||||
-1.27084815866,-0.599802102819
|
||||
-1.5506697485,-0.37391302332
|
||||
0.819305570722,1.43691036146
|
||||
0.758463908179,-0.257726277971
|
||||
1.00739359449,1.43935814903
|
||||
0.296387422059,1.74172031876
|
||||
-1.56792541994,-0.625734935299
|
||||
-1.58294937352,-0.212561302929
|
||||
-1.48429016855,-0.214074430447
|
||||
-1.57271416628,-0.983949703014
|
||||
0.535738594277,1.01076484292
|
||||
-1.47375056852,-0.955937874772
|
||||
0.568475265758,1.64956338847
|
||||
-0.862162831203,-0.884179051907
|
||||
0.544925120741,1.6193204064
|
||||
0.480499087021,2.02664864155
|
||||
0.122038139573,0.119143611341
|
||||
1.08322686266,1.50007405277
|
||||
-1.26363865114,-1.24824215223
|
||||
-1.09515150213,-0.580737374373
|
||||
0.745663888861,-0.797265870367
|
||||
-0.704911858139,-0.435654296496
|
||||
-1.08345708839,-0.683728002502
|
||||
-0.159115840147,1.35521476836
|
||||
-0.834099861805,-0.571377281807
|
||||
0.803301570929,1.04060299172
|
||||
0.882227724909,-1.04635993234
|
||||
-1.42356222195,-1.11563240162
|
||||
0.598075641758,1.34363133224
|
||||
1.00649041199,1.53362494993
|
||||
-1.74840606346,-0.757167172502
|
||||
0.665860879827,1.23423673133
|
||||
-2.27447426719,-1.08752048002
|
||||
-1.48420811929,-0.38750074543
|
||||
0.710494890905,-0.0301573663517
|
||||
0.2452388989,-1.06063486305
|
||||
-1.30030123852,-0.741203235798
|
||||
0.722560907798,-1.0887138629
|
||||
0.845890473528,-0.765476650879
|
||||
-0.987808045599,-0.300980235798
|
||||
0.798685296365,-0.0203804886503
|
||||
-1.27497724309,-0.358325506229
|
||||
-1.63280019769,-0.540251128077
|
||||
-1.64088545492,-0.242763376194
|
||||
-1.27425159086,-0.0278791499227
|
||||
0.710148737729,-1.00918956356
|
||||
0.383105045026,0.834657242727
|
||||
0.838530657897,1.26189497731
|
||||
1.37406167258,0.78263972991
|
||||
-1.95053657495,-1.14561235999
|
||||
0.582792508127,-0.690694356981
|
||||
0.913359445094,-0.619101104092
|
||||
0.785580964186,1.58201082262
|
||||
0.811102347249,-0.425468506696
|
||||
0.702937953149,1.18281732336
|
||||
0.532290267793,-0.895553891619
|
||||
-1.10621476202,0.0215080636159
|
||||
0.175894375098,1.6356268775
|
||||
-0.768255477238,-0.558603363551
|
||||
-1.04310626044,-1.16049754679
|
||||
1.07207441854,1.56204556067
|
||||
1.22409867855,1.22724262076
|
||||
-1.59405917837,-1.18378628891
|
||||
1.47675154752,-0.673616523166
|
||||
-1.07509128453,-0.542721555114
|
||||
0.930232971573,-0.66398380143
|
||||
0.857366630575,-0.359918879193
|
||||
0.251247597619,1.17090459393
|
||||
1.16780804184,-0.28530226059
|
||||
-1.6384800666,-1.12997990407
|
||||
-1.3215530792,-1.28470419469
|
||||
-1.09352620351,0.435204632862
|
||||
0.944537238213,-0.802446108456
|
||||
0.514125239901,1.36791043858
|
||||
0.621920898963,1.97375976496
|
||||
0.209206751878,-1.11104286684
|
||||
0.0993365991821,1.11511466724
|
||||
0.461961063861,-0.637500819694
|
||||
0.257071325333,-0.87287754892
|
||||
0.708547376969,1.39741781887
|
||||
0.475251941501,-1.31719059033
|
||||
0.610276462303,1.04890351278
|
||||
-1.52841458815,-0.454742945322
|
||||
-1.54735702492,-0.751499117222
|
||||
-0.503751481339,-0.655854045695
|
||||
-1.43543163548,-0.356638792552
|
||||
0.472644264313,1.31387413897
|
||||
0.20594576925,1.13671047633
|
||||
0.615102823628,1.3826749988
|
||||
0.634447199559,-0.507752603381
|
||||
-1.14039960054,-0.69475439419
|
||||
-1.32947946028,-0.197603144118
|
||||
-1.20830036549,0.0116873739331
|
||||
-1.73345855335,-0.132096369529
|
||||
0.357039754872,-1.10590660431
|
||||
0.077295305839,-1.13367252031
|
||||
0.487312167153,-1.06555350588
|
||||
0.353693691332,0.747606784869
|
||||
0.72201625169,-0.295881357905
|
||||
-1.92967429146,-0.682263484159
|
||||
-0.908678506749,-0.121343269386
|
||||
0.448752295827,0.926195545738
|
||||
0.418381709077,1.28661802591
|
||||
-1.61004687385,-0.988293010227
|
||||
1.56677839696,1.67458369438
|
||||
0.727890188144,-0.431864801265
|
||||
-1.37981436331,-0.709415449061
|
||||
0.686461132989,-0.0251400314444
|
||||
-1.60911292662,-1.23835579119
|
||||
0.165348852234,1.60960575054
|
||||
1.00429011937,-0.690085482535
|
||||
1.30821133172,-0.946724989662
|
||||
1.10257026468,1.23706132126
|
||||
1.15666930562,0.963710509452
|
||||
-1.67698616873,-0.808446550696
|
||||
0.576897088351,0.99280475248
|
||||
0.94555298466,1.70764263243
|
||||
0.568459449543,-0.902358290454
|
||||
0.273923190749,1.4295727757
|
||||
0.652270914927,2.0019813952
|
||||
0.60267889291,1.04240532752
|
||||
0.698937416681,-1.18542553401
|
||||
0.262702790697,0.705979429324
|
||||
-0.0487035963106,-0.47844505956
|
||||
0.278008906473,0.998459028372
|
||||
0.244071635336,0.754522731174
|
||||
-1.47458283881,-1.04780342988
|
||||
-1.41308398904,-1.16512495168
|
||||
0.376605532983,1.67410945603
|
||||
0.438984358586,1.46896397357
|
||||
-0.791357172838,-0.556204685175
|
||||
0.513404458949,0.148861596075
|
||||
0.55598980494,-0.734532332917
|
||||
-0.00629072685579,1.4577693075
|
||||
0.451340090014,0.847671272714
|
||||
0.587452946206,2.18519727161
|
||||
1.32815664066,1.46277983499
|
||||
1.15868201425,-0.705507183387
|
||||
-2.19229513315,-0.302285759118
|
||||
-1.49261826396,-0.821633387241
|
||||
-1.47130624425,-0.63019322747
|
||||
1.34514654196,-0.468598808267
|
||||
-1.58948534345,-1.07365303551
|
||||
0.279766265285,1.17244610684
|
||||
0.87120798495,1.44312846654
|
||||
0.620040716673,-1.14333094168
|
||||
0.807830356825,-0.835873745532
|
||||
0.894954831928,0.677891197264
|
||||
-0.602527884597,-0.69024906938
|
||||
0.658008384741,-0.400988830169
|
||||
0.914769130628,1.57740991958
|
||||
-1.48930440184,-1.37182156567
|
||||
0.273422769288,-0.11466674929
|
||||
0.472531390995,-0.0835618146858
|
||||
-1.12231828159,-0.713669220151
|
||||
0.241729907099,1.62624331977
|
||||
0.370860921385,0.99072512802
|
||||
0.980701101307,0.273732023633
|
||||
-0.863388034868,-1.63431795709
|
||||
0.734659138483,-1.29170380626
|
||||
1.3680516506,0.926799443937
|
||||
0.149907608089,1.41098618733
|
||||
0.0734364428587,1.47804655719
|
||||
0.415481089773,-1.08580981443
|
||||
1.87043802694,-0.564764189615
|
||||
1.33224467911,-0.791483035077
|
||||
0.463346223426,1.54846334002
|
||||
-1.77220711583,-0.866301255997
|
||||
-0.566063485459,-0.721836722342
|
||||
-1.36471735933,-0.972719760591
|
||||
-1.36476695392,-0.99379395024
|
||||
-1.28469298601,-0.318880090688
|
||||
1.51904736282,1.29062985586
|
||||
0.999041358538,1.43050704451
|
||||
0.187630466783,-0.83321811689
|
||||
-0.987675226269,-0.848285797745
|
||||
0.989408735718,-0.0650375905356
|
||||
0.915630353951,1.24498035147
|
||||
-1.43557093403,-0.974714006397
|
||||
-1.45387959068,-0.481585100397
|
||||
-0.278976401689,-0.795716253642
|
||||
0.400148808814,0.441913646796
|
||||
0.757992930737,-0.758884707769
|
||||
-1.92697167247,-0.984413224925
|
||||
-0.946132960141,-0.951642902807
|
||||
-0.374479291272,-1.13624183449
|
||||
0.40711034652,-0.252446172482
|
||||
-1.09764344526,-0.517312637143
|
||||
0.669488945211,-0.522164749169
|
||||
0.850151323586,1.34147908285
|
||||
1.25907388444,-0.386367441721
|
||||
0.347464827015,-0.533007300891
|
||||
-1.27564074212,-0.433436163023
|
||||
0.673862073887,1.42476903402
|
||||
0.849062018952,1.08302511354
|
||||
0.144638692788,1.95227594178
|
||||
-1.42722323034,-0.811204501399
|
||||
-1.64416438467,-0.543384736216
|
||||
0.463117573507,-0.274247994439
|
||||
0.943724941954,1.44571155091
|
||||
0.870463827056,1.22602060889
|
||||
1.18495335344,-0.234761356364
|
||||
0.733969567923,-0.67288952967
|
||||
-1.79850796029,-0.934840788244
|
||||
0.888166348926,-0.884107436609
|
||||
1.27188343182,-0.959291102507
|
||||
0.841840207037,-0.974022163633
|
||||
0.945701225098,1.6275104008
|
||||
0.660977855757,-0.970868100576
|
||||
-0.965730793499,-0.409989983699
|
||||
1.03969981683,-0.365715737398
|
||||
-1.16752822897,-0.477847154481
|
||||
-1.01284867036,-0.328504389699
|
||||
-1.54030317305,-0.13349028812
|
||||
0.624306174222,-0.148540727917
|
||||
0.667301055225,1.49923194215
|
||||
0.778588762664,-0.384883563802
|
||||
0.298516528096,1.32728546211
|
||||
0.624466159376,-1.19599990114
|
||||
0.810461715314,-0.542544742288
|
||||
0.469536296015,1.49442431098
|
||||
0.133949872378,0.783045014519
|
||||
1.68141770912,-0.716638871392
|
||||
-1.19132044269,-1.03412936975
|
||||
0.0382702789919,0.234517417471
|
||||
0.478943470729,1.37763420757
|
||||
1.59302092264,-0.720684680801
|
||||
0.361166988246,0.985493265361
|
||||
0.768892000974,0.97473143902
|
||||
0.962170710559,1.33000155872
|
||||
-1.1943589351,-0.485554696436
|
||||
0.306037432346,0.536315500264
|
||||
1.01845907675,-0.550623430195
|
||||
-1.26470285038,-0.339699106261
|
||||
0.633088431313,-1.00774511415
|
||||
0.800859848663,0.881746319329
|
||||
-1.90816425794,-0.664624198594
|
||||
0.403731959752,1.16592477989
|
||||
0.63308271635,0.852550261268
|
||||
0.184258505898,-0.476223501919
|
||||
0.368550995255,0.768785338289
|
||||
0.674247371136,-0.335924019381
|
||||
-1.79245127675,-1.4045223247
|
||||
-1.87656067,-0.476116371583
|
||||
0.493448265324,-0.820792271427
|
||||
-1.60183922321,-0.868539405266
|
||||
0.505927093362,1.21392675643
|
||||
-1.64046095603,-0.469972586457
|
||||
-0.0571387622957,-0.909261054305
|
||||
-1.1693940706,0.0395969246032
|
||||
0.263229511513,-0.926499490699
|
|
188
clustering/dbscan.go
Normal file
188
clustering/dbscan.go
Normal file
@ -0,0 +1,188 @@
|
||||
package clustering
|
||||
|
||||
import (
|
||||
"github.com/gonum/matrix/mat64"
|
||||
"github.com/sjwhitworth/golearn/base"
|
||||
"github.com/sjwhitworth/golearn/metrics/pairwise"
|
||||
"math/big"
|
||||
)
|
||||
|
||||
// DBSCANParameters describes the parameters of the density-based
|
||||
// clustering algorithm DBSCAN
|
||||
type DBSCANParameters struct {
|
||||
ClusterParameters
|
||||
|
||||
// Eps represents the "reachability", or the maximum
|
||||
// distance any point can be before being considered for
|
||||
// inclusion.
|
||||
Eps float64
|
||||
|
||||
// MinCount represents how many points need to be
|
||||
// in a cluster before it is considered one.
|
||||
MinCount int
|
||||
}
|
||||
|
||||
func regionQuery(p int, ret *big.Int, dist *mat64.Dense, eps float64) *big.Int {
|
||||
rows, _ := dist.Dims()
|
||||
// Return any points within the Eps neighbourhood
|
||||
for i := 0; i < rows; i++ {
|
||||
if dist.At(p, i) <= eps {
|
||||
ret = ret.SetBit(ret, i, 1) // Mark as neighbour
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func computePairwiseDistances(inst base.FixedDataGrid, attrs []base.Attribute, metric pairwise.PairwiseDistanceFunc) (*mat64.Dense, error) {
|
||||
// Compute pair-wise distances
|
||||
// First convert everything to floats
|
||||
mats, err := base.ConvertAllRowsToMat64(attrs, inst)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Next, do an n^2 computation of all pairwise distances
|
||||
_, rows := inst.Size()
|
||||
dist := mat64.NewDense(rows, rows, nil)
|
||||
for i := 0; i < rows; i++ {
|
||||
for j := i + 1; j < rows; j++ {
|
||||
d := metric.Distance(mats[i], mats[j])
|
||||
dist.Set(i, j, d)
|
||||
dist.Set(j, i, d)
|
||||
}
|
||||
}
|
||||
return dist, nil
|
||||
}
|
||||
|
||||
// DBSCAN clusters inst using the parameters allowed in and produces a ClusterId->[RowId] map
|
||||
func DBSCAN(inst base.FixedDataGrid, params DBSCANParameters) (ClusterMap, error) {
|
||||
|
||||
// Compute the distances between each possible point
|
||||
dist, err := computePairwiseDistances(inst, params.Attributes, params.Metric)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, rows := inst.Size()
|
||||
|
||||
clusterMap := make(map[int][]int)
|
||||
visited := big.NewInt(0)
|
||||
clustered := big.NewInt(0)
|
||||
// expandCluster adds P to a cluster C, visiting any neighbours
|
||||
expandCluster := func(p int, neighbours *big.Int, c int) {
|
||||
if clustered.Bit(p) == 1 {
|
||||
panic("Shouldn't happen!")
|
||||
}
|
||||
// Add this point to cluster C
|
||||
if _, ok := clusterMap[c]; !ok {
|
||||
clusterMap[c] = make([]int, 0)
|
||||
}
|
||||
clusterMap[c] = append(clusterMap[c], p)
|
||||
clustered.SetBit(clustered, p, 1)
|
||||
visited.SetBit(visited, p, 1)
|
||||
|
||||
for i := 0; i < rows; i++ {
|
||||
reset := false
|
||||
if neighbours.Bit(i) == 0 {
|
||||
// Not a neighbour, so skip
|
||||
continue
|
||||
}
|
||||
if visited.Bit(i) == 0 {
|
||||
// not yet visited
|
||||
visited = visited.SetBit(visited, i, 1) // Mark as visited
|
||||
newNeighbours := big.NewInt(0)
|
||||
newNeighbours = regionQuery(i, newNeighbours, dist, params.Eps)
|
||||
if BitCount(newNeighbours) >= params.MinCount {
|
||||
neighbours = neighbours.Or(neighbours, newNeighbours)
|
||||
reset = true
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
if clustered.Bit(i) == 0 {
|
||||
clusterMap[c] = append(clusterMap[c], i)
|
||||
clustered = clustered.SetBit(clustered, i, 1)
|
||||
}
|
||||
if reset {
|
||||
i = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c := 0
|
||||
for i := 0; i < rows; i++ {
|
||||
if visited.Bit(i) == 1 {
|
||||
continue // Already visited here
|
||||
}
|
||||
visited.SetBit(visited, i, 1)
|
||||
neighbours := big.NewInt(0)
|
||||
neighbours = regionQuery(i, neighbours, dist, params.Eps)
|
||||
if BitCount(neighbours) < params.MinCount {
|
||||
// Noise, cluster 0
|
||||
clustered = clustered.Or(clustered, neighbours)
|
||||
continue
|
||||
}
|
||||
c = c + 1 // Increment cluster count
|
||||
expandCluster(i, neighbours, c)
|
||||
}
|
||||
|
||||
// Remove anything from the map which doesn't make
|
||||
// minimum points
|
||||
rmKeys := make([]int, 0)
|
||||
for id := range clusterMap {
|
||||
if len(clusterMap[id]) < params.MinCount {
|
||||
rmKeys = append(rmKeys, id)
|
||||
}
|
||||
}
|
||||
for _, r := range rmKeys {
|
||||
delete(clusterMap, r)
|
||||
}
|
||||
|
||||
return ClusterMap(clusterMap), nil
|
||||
}
|
||||
|
||||
// How many bits?
|
||||
func BitCount(n *big.Int) int {
|
||||
var count int = 0
|
||||
for _, b := range n.Bytes() {
|
||||
count += int(bitCounts[b])
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// The bit counts for each byte value (0 - 255).
|
||||
var bitCounts = []int8{
|
||||
// Generated by Java BitCount of all values from 0 to 255
|
||||
0, 1, 1, 2, 1, 2, 2, 3,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
5, 6, 6, 7, 6, 7, 7, 8,
|
||||
}
|
750
clustering/dbscan_labels.csv
Normal file
750
clustering/dbscan_labels.csv
Normal file
@ -0,0 +1,750 @@
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
2
|
||||
0
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
-1
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
1
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
-1
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
-1
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
-1
|
||||
2
|
||||
-1
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
-1
|
||||
1
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
-1
|
||||
1
|
||||
0
|
||||
-1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
1
|
||||
-1
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
-1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
1
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
2
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
1
|
||||
-1
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
2
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
2
|
||||
-1
|
||||
2
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
-1
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
2
|
||||
1
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
2
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
-1
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
0
|
||||
1
|
||||
2
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
-1
|
||||
-1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
1
|
||||
-1
|
||||
1
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
2
|
||||
0
|
||||
0
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
-1
|
||||
1
|
||||
0
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
0
|
||||
2
|
||||
2
|
||||
2
|
||||
0
|
||||
0
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
2
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
1
|
||||
2
|
||||
0
|
||||
2
|
||||
2
|
||||
1
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
|
150
clustering/dbscan_test.go
Normal file
150
clustering/dbscan_test.go
Normal file
@ -0,0 +1,150 @@
|
||||
package clustering
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"github.com/gonum/matrix/mat64"
|
||||
"github.com/sjwhitworth/golearn/base"
|
||||
"github.com/sjwhitworth/golearn/metrics/pairwise"
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
"math"
|
||||
"math/big"
|
||||
"os"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDBSCANDistanceQuery(t *testing.T) {
|
||||
|
||||
Convey("Should be able to determine which points are in range...", t, func() {
|
||||
|
||||
// Read in the synthetic test data
|
||||
inst, err := base.ParseCSVToInstances("synthetic.csv", false)
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
// Create a neighbours vector
|
||||
neighbours := big.NewInt(0)
|
||||
|
||||
// Compute pairwise distances
|
||||
dist, err := computePairwiseDistances(inst, inst.AllAttributes(), pairwise.NewEuclidean())
|
||||
So(dist.At(0, 0), ShouldAlmostEqual, 0)
|
||||
So(dist.At(0, 1), ShouldAlmostEqual, 1)
|
||||
So(dist.At(1, 0), ShouldAlmostEqual, 1)
|
||||
So(dist.At(0, 2), ShouldAlmostEqual, math.Sqrt(5))
|
||||
So(dist.At(2, 0), ShouldAlmostEqual, math.Sqrt(5))
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
// Do the region query
|
||||
neighbours = regionQuery(0, neighbours, dist, 1)
|
||||
So(neighbours.Bit(0), ShouldEqual, 1)
|
||||
So(neighbours.Bit(1), ShouldEqual, 1)
|
||||
So(neighbours.Bit(2), ShouldEqual, 0)
|
||||
So(neighbours.Bit(3), ShouldEqual, 0)
|
||||
So(neighbours.Bit(4), ShouldEqual, 0)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestDBSCANSynthetic(t *testing.T) {
|
||||
Convey("Synthetic DBSCAN test should work...", t, func() {
|
||||
|
||||
inst, err := base.ParseCSVToInstances("synthetic.csv", false)
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
p := DBSCANParameters{
|
||||
ClusterParameters{
|
||||
inst.AllAttributes(),
|
||||
pairwise.NewEuclidean(),
|
||||
},
|
||||
1,
|
||||
1,
|
||||
}
|
||||
|
||||
m, err := DBSCAN(inst, p)
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
So(len(m), ShouldEqual, 2)
|
||||
So(m[1], ShouldContain, 0)
|
||||
So(m[1], ShouldContain, 1)
|
||||
So(m[1], ShouldContain, 2)
|
||||
So(m[1], ShouldContain, 3)
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
func TestDBSCANDistanceMetric(t *testing.T) {
|
||||
|
||||
Convey("Check the distance function is sane...", t, func() {
|
||||
|
||||
d1 := mat64.NewDense(1, 2, nil)
|
||||
d2 := mat64.NewDense(1, 2, nil)
|
||||
|
||||
d1.Set(0, 0, 0.494260967249)
|
||||
d1.Set(0, 1, 1.45106696541)
|
||||
d2.Set(0, 0, -1.42808099324)
|
||||
d2.Set(0, 1, -0.83706376669)
|
||||
|
||||
e := pairwise.NewEuclidean()
|
||||
So(e.Distance(d1, d2), ShouldAlmostEqual, 2.9882, 0.001)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestDBSCAN(t *testing.T) {
|
||||
|
||||
Convey("Loading some data and labels...", t, func() {
|
||||
|
||||
inst, err := base.ParseCSVToInstances("dbscan.csv", false)
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
file, err := os.Open("dbscan_labels.csv")
|
||||
defer file.Close()
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
clusterMap := ClusterMap(make(map[int][]int))
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
line := -1
|
||||
for scanner.Scan() {
|
||||
line = line + 1
|
||||
v, err := strconv.ParseInt(scanner.Text(), 10, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
v = v + 1 // -1 are noise in scikit-learn's DBSCAN
|
||||
c := int(v)
|
||||
if c == 0 {
|
||||
continue
|
||||
}
|
||||
if _, ok := clusterMap[c]; !ok {
|
||||
clusterMap[c] = make([]int, 0)
|
||||
}
|
||||
clusterMap[c] = append(clusterMap[c], line)
|
||||
}
|
||||
|
||||
Convey("Our DBSCAN implementation should match...", func() {
|
||||
p := DBSCANParameters{
|
||||
ClusterParameters{
|
||||
inst.AllAttributes(),
|
||||
pairwise.NewEuclidean(),
|
||||
},
|
||||
0.3,
|
||||
10,
|
||||
}
|
||||
m, err := DBSCAN(inst, p)
|
||||
Convey("There should be nothing in the result that's smaller than MinPts", func() {
|
||||
|
||||
for id := range m {
|
||||
So(len(m[id]), ShouldBeGreaterThanOrEqualTo, 10)
|
||||
}
|
||||
|
||||
})
|
||||
So(err, ShouldBeNil)
|
||||
eq, err := clusterMap.Equals(m)
|
||||
So(err, ShouldBeNil)
|
||||
So(eq, ShouldBeTrue)
|
||||
})
|
||||
})
|
||||
|
||||
}
|
30
clustering/gen_test.py
Normal file
30
clustering/gen_test.py
Normal file
@ -0,0 +1,30 @@
|
||||
#
|
||||
# Generate sample data for the DBSCAN test
|
||||
#
|
||||
# Lifted from http://scikit-learn.org/stable/auto_examples/cluster/plot_dbscan.html#example-cluster-plot-dbscan-py
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.cluster import DBSCAN
|
||||
from sklearn import metrics
|
||||
from sklearn.datasets.samples_generator import make_blobs
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
centers = [[1, 1], [-1, -1], [1, -1]]
|
||||
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
|
||||
random_state=0)
|
||||
|
||||
X = StandardScaler().fit_transform(X)
|
||||
X = X.astype(np.float64)
|
||||
db = DBSCAN(eps=0.3, min_samples=10, metric='l2', algorithm='brute').fit(X)
|
||||
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
|
||||
core_samples_mask[db.core_sample_indices_] = True
|
||||
labels = db.labels_
|
||||
|
||||
with open('dbscan.csv', 'w') as fscanout:
|
||||
with open('dbscan_labels.csv', 'w') as fscanlabout:
|
||||
for i in range(750):
|
||||
fscanout.write(",".join([str(x) for x in X[i,:]]) + "\n")
|
||||
fscanlabout.write(str(labels[i]) + "\n")
|
||||
|
5
clustering/synthetic.csv
Normal file
5
clustering/synthetic.csv
Normal file
@ -0,0 +1,5 @@
|
||||
0,4
|
||||
1,4
|
||||
2,3
|
||||
2,4
|
||||
3,1
|
|
Loading…
x
Reference in New Issue
Block a user