1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-26 13:49:14 +08:00
golearn/base/sort.go
2014-08-03 15:16:38 +01:00

169 lines
3.8 KiB
Go

package base
import (
"bytes"
"encoding/binary"
)
func sortXorOp(b []byte) []byte {
ret := make([]byte, len(b))
copy(ret, b)
ret[0] ^= 0x80
return ret
}
type sortSpec struct {
r1 int
r2 int
}
// Returns sortSpecs for inst in ascending order
func createSortSpec(inst FixedDataGrid, attrsArg []AttributeSpec) []sortSpec {
attrs := make([]AttributeSpec, len(attrsArg))
copy(attrs, attrsArg)
// Reverse attribute order to be more intuitive
for i, j := 0, len(attrs)-1; i < j; i, j = i+1, j-1 {
attrs[i], attrs[j] = attrs[j], attrs[i]
}
_, rows := inst.Size()
ret := make([]sortSpec, 0)
// Create a buffer
buf := bytes.NewBuffer(nil)
ds := make([][]byte, rows)
rs := make([]int, rows)
rowSize := 0
inst.MapOverRows(attrs, func(row [][]byte, rowNo int) (bool, error) {
if rowSize == 0 {
// Allocate a row buffer
for _, r := range row {
rowSize += len(r)
}
}
byteBuf := make([]byte, rowSize)
for i, r := range row {
if i == 0 {
binary.Write(buf, binary.LittleEndian, sortXorOp(r))
} else {
binary.Write(buf, binary.LittleEndian, r)
}
}
buf.Read(byteBuf)
ds[rowNo] = byteBuf
rs[rowNo] = rowNo
return true, nil
})
// Sort values
valueBins := make([][][]byte, 256)
rowBins := make([][]int, 256)
for i := 0; i < rowSize; i++ {
for j := 0; j < len(ds); j++ {
// Address each row value by it's ith byte
b := ds[j]
valueBins[b[i]] = append(valueBins[b[i]], b)
rowBins[b[i]] = append(rowBins[b[i]], rs[j])
}
j := 0
for k := 0; k < 256; k++ {
bs := valueBins[k]
rc := rowBins[k]
copy(ds[j:], bs)
copy(rs[j:], rc)
j += len(bs)
valueBins[k] = bs[:0]
rowBins[k] = rc[:0]
}
}
done := make([]bool, rows)
for index := range rs {
if done[index] {
continue
}
j := index
for {
done[j] = true
if rs[j] != index {
ret = append(ret, sortSpec{j, rs[j]})
j = rs[j]
} else {
break
}
}
}
return ret
}
// Sort does a radix sort of DenseInstances, using SortDirection
// direction (Ascending or Descending) with attrs as a slice of Attribute
// indices that you want to sort by.
//
// IMPORTANT: Radix sort is not stable, so ordering outside
// the attributes used for sorting is arbitrary.
func Sort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error) {
sortInstructions := createSortSpec(inst, attrs)
instUpdatable, ok := inst.(*DenseInstances)
if ok {
for _, i := range sortInstructions {
instUpdatable.swapRows(i.r1, i.r2)
}
if direction == Descending {
// Reverse the matrix
_, rows := inst.Size()
for i, j := 0, rows-1; i < j; i, j = i+1, j-1 {
instUpdatable.swapRows(i, j)
}
}
} else {
panic("Sort is not supported for this yet!")
}
return instUpdatable, nil
}
// LazySort also does a sort, but returns an InstanceView and doesn't actually
// reorder the rows, just makes it look like they've been reordered
// See also: Sort
func LazySort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error) {
// Run the sort operation
sortInstructions := createSortSpec(inst, attrs)
// Build the row -> row mapping
_, rows := inst.Size() // Get the total row count
rowArr := make([]int, rows) // Create an array of positions
for i := 0; i < len(rowArr); i++ {
rowArr[i] = i
}
for i := range sortInstructions {
r1 := rowArr[sortInstructions[i].r1]
r2 := rowArr[sortInstructions[i].r2]
// Swap
rowArr[sortInstructions[i].r1] = r2
rowArr[sortInstructions[i].r2] = r1
}
if direction == Descending {
for i, j := 0, rows-1; i < j; i, j = i+1, j-1 {
tmp := rowArr[i]
rowArr[i] = rowArr[j]
rowArr[j] = tmp
}
}
// Create a mapping dictionary
rowMap := make(map[int]int)
for i, a := range rowArr {
if i == a {
continue
}
rowMap[i] = a
}
// Create the return structure
ret := NewInstancesViewFromRows(inst, rowMap)
return ret, nil
}