1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-25 13:48:49 +08:00
golearn/clustering/clustering.go
2016-06-14 00:56:47 +01:00

109 lines
3.0 KiB
Go

/* This package implements clustering algorithms */
package clustering
import (
"fmt"
"github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/metrics/pairwise"
)
// ClusterParameters takes a number of variables common to all clustering
// algorithms.
type ClusterParameters struct {
// Attributes represents the set of Attributes which
// can be used for clustering
Attributes []base.Attribute
// Metric is used to compute pairwise distance
Metric pairwise.PairwiseDistanceFunc
}
// ClusterMap contains the cluster identifier as a key, followed by a vector of point
// indices that cluster contains.
type ClusterMap map[int][]int
// Invert returns an alternative form of cluster map where the key represents the point
// index and the value represents the cluster index it's assigned to
func (ref ClusterMap) Invert() (map[int]int, error) {
ret := make(map[int]int)
for c := range ref {
for _, p := range ref[c] {
if _, ok := ret[p]; ok {
return nil, fmt.Errorf("Not a valid cluster map (points appear in more than one cluster)")
} else {
ret[p] = c
}
}
}
return ret, nil
}
// Equals checks whether a bijection exists between two ClusterMaps (i.e. the clusters in one can
// be re-labelled to become the clusters of another)
func (ref ClusterMap) Equals(other ClusterMap) (bool, error) {
if len(ref) != len(other) {
return false, fmt.Errorf("ref and other do not contain the same number of clusters (%d and %d)", len(ref), len(other))
}
refInv, err := ref.Invert()
if err != nil {
return false, fmt.Errorf("ref: %s", err)
}
otherInv, err := other.Invert()
if err != nil {
return false, fmt.Errorf("other: %s", err)
}
clusterIdMap := make(map[int]int)
// Range through each point index
for p := range refInv {
c1 := refInv[p] // Get the cluster index of this point
if c2, ok := otherInv[p]; ok { // Check if the other map has this point
// if so, c2 is the point's cluster in the other map
if c3, ok := clusterIdMap[c2]; ok { // what's our correspondance with c2?
if c1 != c3 {
// if c1 is not what we've currently got, error out
return false, fmt.Errorf("ref point %d (cluster %d) is assigned to a different cluster (%d) in ref %+v", p, c2, c1, clusterIdMap)
}
} else {
clusterIdMap[c2] = c1
}
} else {
return false, fmt.Errorf("failed to find reference point %d in src", p)
}
}
// Check that after transformation, key contains the same points
arraysEqual := func(a1, a2 []int) bool {
cnt := make(map[int]bool)
for _, a := range a1 {
cnt[a] = true
}
for _, a := range a2 {
if _, ok := cnt[a]; !ok {
return false
}
}
return true
}
newMap := ClusterMap(make(map[int][]int))
for cOld := range other {
cNew := clusterIdMap[cOld]
if !arraysEqual(ref[cNew], other[cOld]) {
return false, fmt.Errorf("Re-labelled cluster %d => %d doesn't contain the same points (%d, %d)", cOld, cNew, ref[cNew], other[cOld])
}
newMap[cNew] = other[cOld]
}
return true, nil
}