mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-25 13:48:49 +08:00
169 lines
3.8 KiB
Go
169 lines
3.8 KiB
Go
package base
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
)
|
|
|
|
func sortXorOp(b []byte) []byte {
|
|
ret := make([]byte, len(b))
|
|
copy(ret, b)
|
|
ret[0] ^= 0x80
|
|
return ret
|
|
}
|
|
|
|
type sortSpec struct {
|
|
r1 int
|
|
r2 int
|
|
}
|
|
|
|
// Returns sortSpecs for inst in ascending order
|
|
func createSortSpec(inst FixedDataGrid, attrsArg []AttributeSpec) []sortSpec {
|
|
|
|
attrs := make([]AttributeSpec, len(attrsArg))
|
|
copy(attrs, attrsArg)
|
|
// Reverse attribute order to be more intuitive
|
|
for i, j := 0, len(attrs)-1; i < j; i, j = i+1, j-1 {
|
|
attrs[i], attrs[j] = attrs[j], attrs[i]
|
|
}
|
|
|
|
_, rows := inst.Size()
|
|
ret := make([]sortSpec, 0)
|
|
|
|
// Create a buffer
|
|
buf := bytes.NewBuffer(nil)
|
|
ds := make([][]byte, rows)
|
|
rs := make([]int, rows)
|
|
rowSize := 0
|
|
|
|
inst.MapOverRows(attrs, func(row [][]byte, rowNo int) (bool, error) {
|
|
if rowSize == 0 {
|
|
// Allocate a row buffer
|
|
for _, r := range row {
|
|
rowSize += len(r)
|
|
}
|
|
}
|
|
|
|
byteBuf := make([]byte, rowSize)
|
|
|
|
for i, r := range row {
|
|
if i == 0 {
|
|
binary.Write(buf, binary.LittleEndian, sortXorOp(r))
|
|
} else {
|
|
binary.Write(buf, binary.LittleEndian, r)
|
|
}
|
|
}
|
|
|
|
buf.Read(byteBuf)
|
|
ds[rowNo] = byteBuf
|
|
rs[rowNo] = rowNo
|
|
return true, nil
|
|
})
|
|
|
|
// Sort values
|
|
valueBins := make([][][]byte, 256)
|
|
rowBins := make([][]int, 256)
|
|
for i := 0; i < rowSize; i++ {
|
|
for j := 0; j < len(ds); j++ {
|
|
// Address each row value by it's ith byte
|
|
b := ds[j]
|
|
valueBins[b[i]] = append(valueBins[b[i]], b)
|
|
rowBins[b[i]] = append(rowBins[b[i]], rs[j])
|
|
}
|
|
j := 0
|
|
for k := 0; k < 256; k++ {
|
|
bs := valueBins[k]
|
|
rc := rowBins[k]
|
|
copy(ds[j:], bs)
|
|
copy(rs[j:], rc)
|
|
j += len(bs)
|
|
valueBins[k] = bs[:0]
|
|
rowBins[k] = rc[:0]
|
|
}
|
|
}
|
|
|
|
done := make([]bool, rows)
|
|
for index := range rs {
|
|
if done[index] {
|
|
continue
|
|
}
|
|
j := index
|
|
for {
|
|
done[j] = true
|
|
if rs[j] != index {
|
|
ret = append(ret, sortSpec{j, rs[j]})
|
|
j = rs[j]
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return ret
|
|
}
|
|
|
|
// Sort does a radix sort of DenseInstances, using SortDirection
|
|
// direction (Ascending or Descending) with attrs as a slice of Attribute
|
|
// indices that you want to sort by.
|
|
//
|
|
// IMPORTANT: Radix sort is not stable, so ordering outside
|
|
// the attributes used for sorting is arbitrary.
|
|
func Sort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error) {
|
|
sortInstructions := createSortSpec(inst, attrs)
|
|
instUpdatable, ok := inst.(*DenseInstances)
|
|
if ok {
|
|
for _, i := range sortInstructions {
|
|
instUpdatable.swapRows(i.r1, i.r2)
|
|
}
|
|
if direction == Descending {
|
|
// Reverse the matrix
|
|
_, rows := inst.Size()
|
|
for i, j := 0, rows-1; i < j; i, j = i+1, j-1 {
|
|
instUpdatable.swapRows(i, j)
|
|
}
|
|
}
|
|
} else {
|
|
panic("Sort is not supported for this yet!")
|
|
}
|
|
return instUpdatable, nil
|
|
}
|
|
|
|
// LazySort also does a sort, but returns an InstanceView and doesn't actually
|
|
// reorder the rows, just makes it look like they've been reordered
|
|
// See also: Sort
|
|
func LazySort(inst FixedDataGrid, direction SortDirection, attrs []AttributeSpec) (FixedDataGrid, error) {
|
|
// Run the sort operation
|
|
sortInstructions := createSortSpec(inst, attrs)
|
|
|
|
// Build the row -> row mapping
|
|
_, rows := inst.Size() // Get the total row count
|
|
rowArr := make([]int, rows) // Create an array of positions
|
|
for i := 0; i < len(rowArr); i++ {
|
|
rowArr[i] = i
|
|
}
|
|
for i := range sortInstructions {
|
|
r1 := rowArr[sortInstructions[i].r1]
|
|
r2 := rowArr[sortInstructions[i].r2]
|
|
// Swap
|
|
rowArr[sortInstructions[i].r1] = r2
|
|
rowArr[sortInstructions[i].r2] = r1
|
|
}
|
|
if direction == Descending {
|
|
for i, j := 0, rows-1; i < j; i, j = i+1, j-1 {
|
|
tmp := rowArr[i]
|
|
rowArr[i] = rowArr[j]
|
|
rowArr[j] = tmp
|
|
}
|
|
}
|
|
// Create a mapping dictionary
|
|
rowMap := make(map[int]int)
|
|
for i, a := range rowArr {
|
|
if i == a {
|
|
continue
|
|
}
|
|
rowMap[i] = a
|
|
}
|
|
// Create the return structure
|
|
ret := NewInstancesViewFromRows(inst, rowMap)
|
|
return ret, nil
|
|
}
|