mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-05-01 22:18:10 +08:00
base: Added support for grouping and storing BinaryAttributes
* Pond was renamed to FixedAttributeGroup. * AttributeGroup interface. * BinaryAttributeGroup introduced.
This commit is contained in:
parent
d4ac151271
commit
8196db1230
62
base/bag.go
Normal file
62
base/bag.go
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BinaryAttributeGroups contain only BinaryAttributes
|
||||||
|
// Compact each Attribute to a bit for better storage
|
||||||
|
type BinaryAttributeGroup struct {
|
||||||
|
FixedAttributeGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BinaryAttributeGroup) RowSize() int {
|
||||||
|
return (len(b.attributes) + 7) / 8
|
||||||
|
}
|
||||||
|
|
||||||
|
// String gets a human-readable view of this group
|
||||||
|
func (b *BinaryAttributeGroup) String() string {
|
||||||
|
if len(b.alloc) > 1 {
|
||||||
|
return fmt.Sprintf("BinaryAttributeGroup(%d attributes\n thread: %d\n size: %d\n)", len(b.attributes), b.threadNo, b.size)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("BinaryAttributeGroup(%d attributes\n thread: %d\n size: %d\n)", len(b.attributes), b.threadNo, b.size)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BinaryAttributeGroup) getByteOffset(col, row int) int {
|
||||||
|
return row*b.RowSize() + col/8
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BinaryAttributeGroup) set(col, row int, val []byte) {
|
||||||
|
// Resolve the block
|
||||||
|
curBlock, blockOffset := b.resolveBlock(col, row)
|
||||||
|
|
||||||
|
// If the value is 1, OR it
|
||||||
|
if val[0] > 0 {
|
||||||
|
b.alloc[curBlock][blockOffset] |= (1 << (uint(col) % 8))
|
||||||
|
} else {
|
||||||
|
// Otherwise, AND its complement
|
||||||
|
b.alloc[curBlock][blockOffset] &= ^(1 << (uint(col) % 8))
|
||||||
|
}
|
||||||
|
|
||||||
|
row++
|
||||||
|
if row > b.maxRow {
|
||||||
|
b.maxRow = row
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BinaryAttributeGroup) resolveBlock(col, row int) (int, int) {
|
||||||
|
|
||||||
|
byteOffset := row*b.RowSize() + (col / 3)
|
||||||
|
rowSize := b.RowSize()
|
||||||
|
return b.FixedAttributeGroup.resolveBlockFromByteOffset(byteOffset, rowSize)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BinaryAttributeGroup) get(col, row int) []byte {
|
||||||
|
curBlock, blockOffset := b.resolveBlock(col, row)
|
||||||
|
if b.alloc[curBlock][blockOffset]&(1<<(uint(col%8))) > 0 {
|
||||||
|
return []byte{1}
|
||||||
|
} else {
|
||||||
|
return []byte{0}
|
||||||
|
}
|
||||||
|
}
|
136
base/bag_test.go
Normal file
136
base/bag_test.go
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
. "github.com/smartystreets/goconvey/convey"
|
||||||
|
"math/rand"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBAGSimple(t *testing.T) {
|
||||||
|
|
||||||
|
Convey("Given certain bit data", t, func() {
|
||||||
|
// Generate said bits
|
||||||
|
bVals := [][]byte{
|
||||||
|
[]byte{1, 0, 0},
|
||||||
|
[]byte{0, 1, 0},
|
||||||
|
[]byte{0, 0, 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new DenseInstances
|
||||||
|
inst := NewDenseInstances()
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
inst.AddAttribute(NewBinaryAttribute(fmt.Sprintf("%d", i)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get and re-order the attributes
|
||||||
|
attrSpecsUnordered := ResolveAllAttributes(inst)
|
||||||
|
attrSpecs := make([]AttributeSpec, 3)
|
||||||
|
for _, a := range attrSpecsUnordered {
|
||||||
|
name := a.GetAttribute().GetName()
|
||||||
|
if name == "0" {
|
||||||
|
attrSpecs[0] = a
|
||||||
|
} else if name == "1" {
|
||||||
|
attrSpecs[1] = a
|
||||||
|
} else if name == "2" {
|
||||||
|
attrSpecs[2] = a
|
||||||
|
} else {
|
||||||
|
panic(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inst.Extend(3)
|
||||||
|
|
||||||
|
for row, b := range bVals {
|
||||||
|
for col, c := range b {
|
||||||
|
inst.Set(attrSpecs[col], row, []byte{c})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Convey("All the row values should be the right length...", func() {
|
||||||
|
inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
|
||||||
|
for i := range attrSpecs {
|
||||||
|
So(len(row[i]), ShouldEqual, 1)
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Convey("All the values should be the same...", func() {
|
||||||
|
inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
|
||||||
|
for j := range attrSpecs {
|
||||||
|
So(row[j][0], ShouldEqual, bVals[i][j])
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBAG(t *testing.T) {
|
||||||
|
Convey("Given randomly generated bit data", t, func() {
|
||||||
|
// Generate said bits
|
||||||
|
bVals := make([][]byte, 0)
|
||||||
|
for i := 0; i < 50; i++ {
|
||||||
|
b := make([]byte, 3)
|
||||||
|
for j := 0; j < 3; j++ {
|
||||||
|
if rand.NormFloat64() >= 0 {
|
||||||
|
b[j] = byte(1)
|
||||||
|
} else {
|
||||||
|
b[j] = byte(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bVals = append(bVals, b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new DenseInstances
|
||||||
|
inst := NewDenseInstances()
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
inst.AddAttribute(NewBinaryAttribute(fmt.Sprintf("%d", i)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get and re-order the attributes
|
||||||
|
attrSpecsUnordered := ResolveAllAttributes(inst)
|
||||||
|
attrSpecs := make([]AttributeSpec, 3)
|
||||||
|
for _, a := range attrSpecsUnordered {
|
||||||
|
name := a.GetAttribute().GetName()
|
||||||
|
if name == "0" {
|
||||||
|
attrSpecs[0] = a
|
||||||
|
} else if name == "1" {
|
||||||
|
attrSpecs[1] = a
|
||||||
|
} else if name == "2" {
|
||||||
|
attrSpecs[2] = a
|
||||||
|
} else {
|
||||||
|
panic(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inst.Extend(50)
|
||||||
|
|
||||||
|
for row, b := range bVals {
|
||||||
|
for col, c := range b {
|
||||||
|
inst.Set(attrSpecs[col], row, []byte{c})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Convey("All the row values should be the right length...", func() {
|
||||||
|
inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
|
||||||
|
for i := range attrSpecs {
|
||||||
|
So(len(row[i]), ShouldEqual, 1)
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Convey("All the values should be the same...", func() {
|
||||||
|
inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
|
||||||
|
for j := range attrSpecs {
|
||||||
|
So(row[j][0], ShouldEqual, bVals[i][j])
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
164
base/dense.go
164
base/dense.go
@ -12,8 +12,8 @@ import (
|
|||||||
// in a large grid.
|
// in a large grid.
|
||||||
type DenseInstances struct {
|
type DenseInstances struct {
|
||||||
storage *edf.EdfFile
|
storage *edf.EdfFile
|
||||||
pondMap map[string]int
|
agMap map[string]int
|
||||||
ponds []*Pond
|
ags []AttributeGroup
|
||||||
lock sync.Mutex
|
lock sync.Mutex
|
||||||
fixed bool
|
fixed bool
|
||||||
classAttrs map[AttributeSpec]bool
|
classAttrs map[AttributeSpec]bool
|
||||||
@ -31,7 +31,7 @@ func NewDenseInstances() *DenseInstances {
|
|||||||
return &DenseInstances{
|
return &DenseInstances{
|
||||||
storage,
|
storage,
|
||||||
make(map[string]int),
|
make(map[string]int),
|
||||||
make([]*Pond, 0),
|
make([]AttributeGroup, 0),
|
||||||
sync.Mutex{},
|
sync.Mutex{},
|
||||||
false,
|
false,
|
||||||
make(map[AttributeSpec]bool),
|
make(map[AttributeSpec]bool),
|
||||||
@ -41,12 +41,15 @@ func NewDenseInstances() *DenseInstances {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Pond functions
|
// AttributeGroup functions
|
||||||
//
|
//
|
||||||
|
|
||||||
// createPond adds a new Pond to this set of Instances
|
// createAttributeGroup adds a new AttributeGroup to this set of Instances
|
||||||
// IMPORTANT: do not call unless you've acquired the lock
|
// IMPORTANT: do not call unless you've acquired the lock
|
||||||
func (inst *DenseInstances) createPond(name string, size int) {
|
func (inst *DenseInstances) createAttributeGroup(name string, size int) {
|
||||||
|
|
||||||
|
var agAdd AttributeGroup
|
||||||
|
|
||||||
if inst.fixed {
|
if inst.fixed {
|
||||||
panic("Can't add additional Attributes")
|
panic("Can't add additional Attributes")
|
||||||
}
|
}
|
||||||
@ -65,7 +68,7 @@ func (inst *DenseInstances) createPond(name string, size int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ok {
|
if ok {
|
||||||
panic("Can't create pond: pond thread already exists")
|
panic("Can't create AttributeGroup: thread already exists")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write the pool's thread into the file
|
// Write the pool's thread into the file
|
||||||
@ -75,28 +78,38 @@ func (inst *DenseInstances) createPond(name string, size int) {
|
|||||||
panic(fmt.Sprintf("Can't write thread: %s", err))
|
panic(fmt.Sprintf("Can't write thread: %s", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the pond information
|
// Create the AttributeGroup information
|
||||||
pond := new(Pond)
|
if size != 0 {
|
||||||
pond.threadNo = thread.GetId()
|
ag := new(FixedAttributeGroup)
|
||||||
pond.parent = inst
|
ag.threadNo = thread.GetId()
|
||||||
pond.attributes = make([]Attribute, 0)
|
ag.parent = inst
|
||||||
pond.size = size
|
ag.attributes = make([]Attribute, 0)
|
||||||
pond.alloc = make([][]byte, 0)
|
ag.size = size
|
||||||
// Store within instances
|
ag.alloc = make([][]byte, 0)
|
||||||
inst.pondMap[name] = len(inst.ponds)
|
agAdd = ag
|
||||||
inst.ponds = append(inst.ponds, pond)
|
} else {
|
||||||
|
ag := new(BinaryAttributeGroup)
|
||||||
|
ag.threadNo = thread.GetId()
|
||||||
|
ag.parent = inst
|
||||||
|
ag.attributes = make([]Attribute, 0)
|
||||||
|
ag.size = size
|
||||||
|
ag.alloc = make([][]byte, 0)
|
||||||
|
agAdd = ag
|
||||||
|
}
|
||||||
|
inst.agMap[name] = len(inst.ags)
|
||||||
|
inst.ags = append(inst.ags, agAdd)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreatePond adds a new Pond to this set of instances
|
// CreateAttributeGroup adds a new AttributeGroup to this set of instances
|
||||||
// with a given name. If the size is 0, a bit-pond is added
|
// with a given name. If the size is 0, a bit-ag is added
|
||||||
// if the size of not 0, then the size of each pond attribute
|
// if the size of not 0, then the size of each ag attribute
|
||||||
// is set to that number of bytes.
|
// is set to that number of bytes.
|
||||||
func (inst *DenseInstances) CreatePond(name string, size int) (err error) {
|
func (inst *DenseInstances) CreateAttributeGroup(name string, size int) (err error) {
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
var ok bool
|
var ok bool
|
||||||
if err, ok = r.(error); !ok {
|
if err, ok = r.(error); !ok {
|
||||||
err = fmt.Errorf("CreatePond: %v (not created)", r)
|
err = fmt.Errorf("CreateAttributeGroup: %v (not created)", r)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@ -104,21 +117,21 @@ func (inst *DenseInstances) CreatePond(name string, size int) (err error) {
|
|||||||
inst.lock.Lock()
|
inst.lock.Lock()
|
||||||
defer inst.lock.Unlock()
|
defer inst.lock.Unlock()
|
||||||
|
|
||||||
inst.createPond(name, size)
|
inst.createAttributeGroup(name, size)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetPond returns a reference to a Pond of a given name /
|
// GetAttributeGroup returns a reference to a AttributeGroup of a given name /
|
||||||
func (inst *DenseInstances) GetPond(name string) (*Pond, error) {
|
func (inst *DenseInstances) GetAttributeGroup(name string) (AttributeGroup, error) {
|
||||||
inst.lock.Lock()
|
inst.lock.Lock()
|
||||||
defer inst.lock.Unlock()
|
defer inst.lock.Unlock()
|
||||||
|
|
||||||
// Check if the pond exists
|
// Check if the ag exists
|
||||||
if id, ok := inst.pondMap[name]; !ok {
|
if id, ok := inst.agMap[name]; !ok {
|
||||||
return nil, fmt.Errorf("Pond '%s' doesn't exist", name)
|
return nil, fmt.Errorf("AttributeGroup '%s' doesn't exist", name)
|
||||||
} else {
|
} else {
|
||||||
// Return the pond
|
// Return the ag
|
||||||
return inst.ponds[id], nil
|
return inst.ags[id], nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,7 +140,7 @@ func (inst *DenseInstances) GetPond(name string) (*Pond, error) {
|
|||||||
//
|
//
|
||||||
|
|
||||||
// AddAttribute adds an Attribute to this set of DenseInstances
|
// AddAttribute adds an Attribute to this set of DenseInstances
|
||||||
// Creates a default Pond for it if a suitable one doesn't exist.
|
// Creates a default AttributeGroup for it if a suitable one doesn't exist.
|
||||||
// Returns an AttributeSpec for subsequent Set() calls.
|
// Returns an AttributeSpec for subsequent Set() calls.
|
||||||
//
|
//
|
||||||
// IMPORTANT: will panic if storage has been allocated via Extend.
|
// IMPORTANT: will panic if storage has been allocated via Extend.
|
||||||
@ -139,48 +152,54 @@ func (inst *DenseInstances) AddAttribute(a Attribute) AttributeSpec {
|
|||||||
panic("Can't add additional Attributes")
|
panic("Can't add additional Attributes")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate a default Pond name
|
// Generate a default AttributeGroup name
|
||||||
pond := "FLOAT"
|
ag := "FLOAT"
|
||||||
if _, ok := a.(*CategoricalAttribute); ok {
|
if _, ok := a.(*CategoricalAttribute); ok {
|
||||||
pond = "CAT"
|
ag = "CAT"
|
||||||
} else if _, ok := a.(*FloatAttribute); ok {
|
} else if _, ok := a.(*FloatAttribute); ok {
|
||||||
pond = "FLOAT"
|
ag = "FLOAT"
|
||||||
|
} else if _, ok := a.(*BinaryAttribute); ok {
|
||||||
|
ag = "BIN"
|
||||||
} else {
|
} else {
|
||||||
panic("Unrecognised Attribute type")
|
panic("Unrecognised Attribute type")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the pond if it doesn't exist
|
// Create the ag if it doesn't exist
|
||||||
if _, ok := inst.pondMap[pond]; !ok {
|
if _, ok := inst.agMap[ag]; !ok {
|
||||||
inst.createPond(pond, 8)
|
if ag != "BIN" {
|
||||||
|
inst.createAttributeGroup(ag, 8)
|
||||||
|
} else {
|
||||||
|
inst.createAttributeGroup(ag, 0)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
id := inst.pondMap[pond]
|
id := inst.agMap[ag]
|
||||||
p := inst.ponds[id]
|
p := inst.ags[id]
|
||||||
p.attributes = append(p.attributes, a)
|
p.AddAttribute(a)
|
||||||
inst.attributes = append(inst.attributes, a)
|
inst.attributes = append(inst.attributes, a)
|
||||||
return AttributeSpec{id, len(p.attributes) - 1, a}
|
return AttributeSpec{id, len(p.Attributes()) - 1, a}
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddAttributeToPond adds an Attribute to a given pond
|
// AddAttributeToAttributeGroup adds an Attribute to a given ag
|
||||||
func (inst *DenseInstances) AddAttributeToPond(newAttribute Attribute, pond string) (AttributeSpec, error) {
|
func (inst *DenseInstances) AddAttributeToAttributeGroup(newAttribute Attribute, ag string) (AttributeSpec, error) {
|
||||||
inst.lock.Lock()
|
inst.lock.Lock()
|
||||||
defer inst.lock.Unlock()
|
defer inst.lock.Unlock()
|
||||||
|
|
||||||
// Check if the pond exists
|
// Check if the ag exists
|
||||||
if _, ok := inst.pondMap[pond]; !ok {
|
if _, ok := inst.agMap[ag]; !ok {
|
||||||
return AttributeSpec{-1, 0, nil}, fmt.Errorf("Pond '%s' doesn't exist. Call CreatePond() first", pond)
|
return AttributeSpec{-1, 0, nil}, fmt.Errorf("Pond '%s' doesn't exist. Call CreatePond() first", ag)
|
||||||
}
|
}
|
||||||
|
|
||||||
id := inst.pondMap[pond]
|
id := inst.agMap[ag]
|
||||||
p := inst.ponds[id]
|
p := inst.ags[id]
|
||||||
for i, a := range p.attributes {
|
for i, a := range p.Attributes() {
|
||||||
if !a.Compatable(newAttribute) {
|
if !a.Compatable(newAttribute) {
|
||||||
return AttributeSpec{-1, 0, nil}, fmt.Errorf("Attribute %s is not compatable with %s in pond '%s' (position %d)", newAttribute, a, pond, i)
|
return AttributeSpec{-1, 0, nil}, fmt.Errorf("Attribute %s is not compatable with %s in pond '%s' (position %d)", newAttribute, a, ag, i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
p.attributes = append(p.attributes, newAttribute)
|
p.AddAttribute(newAttribute)
|
||||||
inst.attributes = append(inst.attributes, newAttribute)
|
inst.attributes = append(inst.attributes, newAttribute)
|
||||||
return AttributeSpec{id, len(p.attributes) - 1, newAttribute}, nil
|
return AttributeSpec{id, len(p.Attributes()) - 1, newAttribute}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetAttribute returns an Attribute equal to the argument.
|
// GetAttribute returns an Attribute equal to the argument.
|
||||||
@ -192,8 +211,8 @@ func (inst *DenseInstances) GetAttribute(get Attribute) (AttributeSpec, error) {
|
|||||||
inst.lock.Lock()
|
inst.lock.Lock()
|
||||||
defer inst.lock.Unlock()
|
defer inst.lock.Unlock()
|
||||||
|
|
||||||
for i, p := range inst.ponds {
|
for i, p := range inst.ags {
|
||||||
for j, a := range p.attributes {
|
for j, a := range p.Attributes() {
|
||||||
if a.Equals(get) {
|
if a.Equals(get) {
|
||||||
return AttributeSpec{i, j, a}, nil
|
return AttributeSpec{i, j, a}, nil
|
||||||
}
|
}
|
||||||
@ -209,8 +228,8 @@ func (inst *DenseInstances) AllAttributes() []Attribute {
|
|||||||
defer inst.lock.Unlock()
|
defer inst.lock.Unlock()
|
||||||
|
|
||||||
ret := make([]Attribute, 0)
|
ret := make([]Attribute, 0)
|
||||||
for _, p := range inst.ponds {
|
for _, p := range inst.ags {
|
||||||
for _, a := range p.attributes {
|
for _, a := range p.Attributes() {
|
||||||
ret = append(ret, a)
|
ret = append(ret, a)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -280,10 +299,9 @@ func (inst *DenseInstances) Extend(rows int) error {
|
|||||||
// Get the size of each page
|
// Get the size of each page
|
||||||
pageSize := inst.storage.GetPageSize()
|
pageSize := inst.storage.GetPageSize()
|
||||||
|
|
||||||
for pondName := range inst.ponds {
|
for _, p := range inst.ags {
|
||||||
p := inst.ponds[pondName]
|
|
||||||
|
|
||||||
// Compute pond row storage requirements
|
// Compute ag row storage requirements
|
||||||
rowSize := p.RowSize()
|
rowSize := p.RowSize()
|
||||||
|
|
||||||
// How many rows can we store per page?
|
// How many rows can we store per page?
|
||||||
@ -293,14 +311,14 @@ func (inst *DenseInstances) Extend(rows int) error {
|
|||||||
pagesNeeded := uint32(math.Ceil(float64(rows) / rowsPerPage))
|
pagesNeeded := uint32(math.Ceil(float64(rows) / rowsPerPage))
|
||||||
|
|
||||||
// Allocate those pages
|
// Allocate those pages
|
||||||
r, err := inst.storage.AllocPages(pagesNeeded, p.threadNo)
|
r, err := inst.storage.AllocPages(pagesNeeded, p.getThreadNo())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Sprintf("Allocation error: %s (rowSize %d, pageSize %d, rowsPerPage %.2f, tried to allocate %d page(s) and extend by %d row(s))", err, rowSize, pageSize, rowsPerPage, pagesNeeded, rows))
|
panic(fmt.Sprintf("Allocation error: %s (rowSize %d, pageSize %d, rowsPerPage %.2f, tried to allocate %d page(s) and extend by %d row(s))", err, rowSize, pageSize, rowsPerPage, pagesNeeded, rows))
|
||||||
}
|
}
|
||||||
// Resolve and assign those pages
|
// Resolve and assign those pages
|
||||||
byteBlock := inst.storage.ResolveRange(r)
|
byteBlock := inst.storage.ResolveRange(r)
|
||||||
for _, block := range byteBlock {
|
for _, block := range byteBlock {
|
||||||
p.alloc = append(p.alloc, block)
|
p.addStorage(block)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inst.fixed = true
|
inst.fixed = true
|
||||||
@ -319,45 +337,31 @@ func (inst *DenseInstances) Extend(rows int) error {
|
|||||||
//
|
//
|
||||||
// IMPORTANT: Will panic() if the val is not the right length
|
// IMPORTANT: Will panic() if the val is not the right length
|
||||||
func (inst *DenseInstances) Set(a AttributeSpec, row int, val []byte) {
|
func (inst *DenseInstances) Set(a AttributeSpec, row int, val []byte) {
|
||||||
inst.ponds[a.pond].set(a.position, row, val)
|
inst.ags[a.pond].set(a.position, row, val)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get gets a particular Attribute (given as an AttributeSpec) on a particular
|
// Get gets a particular Attribute (given as an AttributeSpec) on a particular
|
||||||
// row.
|
// row.
|
||||||
// AttributeSpecs can be obtained using GetAttribute() or AddAttribute()
|
// AttributeSpecs can be obtained using GetAttribute() or AddAttribute()
|
||||||
func (inst *DenseInstances) Get(a AttributeSpec, row int) []byte {
|
func (inst *DenseInstances) Get(a AttributeSpec, row int) []byte {
|
||||||
return inst.ponds[a.pond].get(a.position, row)
|
return inst.ags[a.pond].get(a.position, row)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RowString returns a string representation of a given row.
|
// RowString returns a string representation of a given row.
|
||||||
func (inst *DenseInstances) RowString(row int) string {
|
func (inst *DenseInstances) RowString(row int) string {
|
||||||
var buffer bytes.Buffer
|
var buffer bytes.Buffer
|
||||||
first := true
|
first := true
|
||||||
for name := range inst.ponds {
|
for _, p := range inst.ags {
|
||||||
if first {
|
if first {
|
||||||
first = false
|
first = false
|
||||||
} else {
|
} else {
|
||||||
buffer.WriteString(" ")
|
buffer.WriteString(" ")
|
||||||
}
|
}
|
||||||
p := inst.ponds[name]
|
|
||||||
p.appendToRowBuf(row, &buffer)
|
p.appendToRowBuf(row, &buffer)
|
||||||
}
|
}
|
||||||
return buffer.String()
|
return buffer.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// Row handling functions
|
|
||||||
//
|
|
||||||
|
|
||||||
func (inst *DenseInstances) allocateRowVector(asv []AttributeSpec) [][]byte {
|
|
||||||
ret := make([][]byte, len(asv))
|
|
||||||
for i, as := range asv {
|
|
||||||
p := inst.ponds[as.pond]
|
|
||||||
ret[i] = make([]byte, p.size)
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
// MapOverRows passes each row map into a function.
|
// MapOverRows passes each row map into a function.
|
||||||
// First argument is a list of AttributeSpec in the order
|
// First argument is a list of AttributeSpec in the order
|
||||||
// they're needed in for the function. The second is the function
|
// they're needed in for the function. The second is the function
|
||||||
@ -366,7 +370,7 @@ func (inst *DenseInstances) MapOverRows(asv []AttributeSpec, mapFunc func([][]by
|
|||||||
rowBuf := make([][]byte, len(asv))
|
rowBuf := make([][]byte, len(asv))
|
||||||
for i := 0; i < inst.maxRow; i++ {
|
for i := 0; i < inst.maxRow; i++ {
|
||||||
for j, as := range asv {
|
for j, as := range asv {
|
||||||
p := inst.ponds[as.pond]
|
p := inst.ags[as.pond]
|
||||||
rowBuf[j] = p.get(as.position, i)
|
rowBuf[j] = p.get(as.position, i)
|
||||||
}
|
}
|
||||||
ok, err := mapFunc(rowBuf, i)
|
ok, err := mapFunc(rowBuf, i)
|
||||||
|
135
base/fixed.go
Normal file
135
base/fixed.go
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FixedAttributeGroups contain a particular number of rows of
|
||||||
|
// a particular number of Attributes, all of a given type.
|
||||||
|
type FixedAttributeGroup struct {
|
||||||
|
threadNo uint32
|
||||||
|
parent DataGrid
|
||||||
|
attributes []Attribute
|
||||||
|
size int
|
||||||
|
alloc [][]byte
|
||||||
|
maxRow int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FixedAttributeGroup) String() string {
|
||||||
|
if len(f.alloc) > 1 {
|
||||||
|
return fmt.Sprintf("FixedAttributeGroup(%d attributes\n thread: %d\n size: %d\n)", len(f.attributes), f.threadNo, f.size)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("FixedAttributeGroup(%d attributes\n thread: %d\n size: %d\n %d \n)", len(f.attributes), f.threadNo, f.size, f.alloc[0][0:60])
|
||||||
|
}
|
||||||
|
|
||||||
|
// RowSize returns the size of each row in bytes
|
||||||
|
func (f *FixedAttributeGroup) RowSize() int {
|
||||||
|
return len(f.attributes) * f.size
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attributes returns a slice of Attributes in this FixedAttributeGroup
|
||||||
|
func (f *FixedAttributeGroup) Attributes() []Attribute {
|
||||||
|
return f.attributes
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddAttribute adds an attribute to this FixedAttributeGroup
|
||||||
|
func (f *FixedAttributeGroup) AddAttribute(a Attribute) error {
|
||||||
|
f.attributes = append(f.attributes, a)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getThreadNo returns the ThreadNo assigned to this FixedAttributeGroup
|
||||||
|
func (f *FixedAttributeGroup) getThreadNo() uint32 {
|
||||||
|
return f.threadNo
|
||||||
|
}
|
||||||
|
|
||||||
|
// addStorage appends the given storage reference to this FixedAttributeGroup
|
||||||
|
func (f *FixedAttributeGroup) addStorage(a []byte) {
|
||||||
|
f.alloc = append(f.alloc, a)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Storage returns a slice of FixedAttributeGroupStorageRefs which can
|
||||||
|
// be used to access the memory in this pond.
|
||||||
|
func (f *FixedAttributeGroup) Storage() []AttributeGroupStorageRef {
|
||||||
|
ret := make([]AttributeGroupStorageRef, len(f.alloc))
|
||||||
|
rowSize := f.RowSize()
|
||||||
|
for i, b := range f.alloc {
|
||||||
|
ret[i] = AttributeGroupStorageRef{b, len(b) / rowSize}
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FixedAttributeGroup) resolveBlock(col int, row int) (int, int) {
|
||||||
|
|
||||||
|
if len(f.alloc) == 0 {
|
||||||
|
panic("No blocks to resolve")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find where in the pond the byte is
|
||||||
|
byteOffset := row*f.RowSize() + col*f.size
|
||||||
|
return f.resolveBlockFromByteOffset(byteOffset, f.RowSize())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FixedAttributeGroup) resolveBlockFromByteOffset(byteOffset, rowSize int) (int, int) {
|
||||||
|
curOffset := 0
|
||||||
|
curBlock := 0
|
||||||
|
blockOffset := 0
|
||||||
|
for {
|
||||||
|
if curBlock >= len(f.alloc) {
|
||||||
|
panic("Don't have enough blocks to fulfill")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rows are not allowed to span blocks
|
||||||
|
blockAdd := len(f.alloc[curBlock])
|
||||||
|
blockAdd -= blockAdd % rowSize
|
||||||
|
|
||||||
|
// Case 1: we need to skip this allocation
|
||||||
|
if curOffset+blockAdd < byteOffset {
|
||||||
|
curOffset += blockAdd
|
||||||
|
curBlock++
|
||||||
|
} else {
|
||||||
|
blockOffset = byteOffset - curOffset
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return curBlock, blockOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FixedAttributeGroup) set(col int, row int, val []byte) {
|
||||||
|
|
||||||
|
// Double-check the length
|
||||||
|
if len(val) != f.size {
|
||||||
|
panic(fmt.Sprintf("Tried to call set() with %d bytes, should be %d", len(val), f.size))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find where in the pond the byte is
|
||||||
|
curBlock, blockOffset := f.resolveBlock(col, row)
|
||||||
|
|
||||||
|
// Copy the value in
|
||||||
|
copied := copy(f.alloc[curBlock][blockOffset:], val)
|
||||||
|
if copied != f.size {
|
||||||
|
panic(fmt.Sprintf("set() terminated by only copying %d bytes into the current block (should be %d). Check EDF allocation", copied, f.size))
|
||||||
|
}
|
||||||
|
|
||||||
|
row++
|
||||||
|
if row > f.maxRow {
|
||||||
|
f.maxRow = row
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FixedAttributeGroup) get(col int, row int) []byte {
|
||||||
|
curBlock, blockOffset := f.resolveBlock(col, row)
|
||||||
|
return f.alloc[curBlock][blockOffset : blockOffset+f.size]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FixedAttributeGroup) appendToRowBuf(row int, buffer *bytes.Buffer) {
|
||||||
|
for i, a := range f.attributes {
|
||||||
|
postfix := " "
|
||||||
|
if i == len(f.attributes)-1 {
|
||||||
|
postfix = ""
|
||||||
|
}
|
||||||
|
buffer.WriteString(fmt.Sprintf("%s%s", a.GetStringFromSysVal(f.get(i, row)), postfix))
|
||||||
|
}
|
||||||
|
}
|
36
base/group.go
Normal file
36
base/group.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AttributeGroups store related sequences of system values
|
||||||
|
// in memory for the DenseInstances structure.
|
||||||
|
type AttributeGroup interface {
|
||||||
|
// Returns an EDF thread number
|
||||||
|
getThreadNo() uint32
|
||||||
|
addStorage(a []byte)
|
||||||
|
// Used for printing
|
||||||
|
appendToRowBuf(row int, buffer *bytes.Buffer)
|
||||||
|
// Adds a new Attribute
|
||||||
|
AddAttribute(Attribute) error
|
||||||
|
// Returns all Attributes
|
||||||
|
Attributes() []Attribute
|
||||||
|
// Gets the byte slice at a given column, row offset
|
||||||
|
get(int, int) []byte
|
||||||
|
// Stores the byte slice at a given column, row offset
|
||||||
|
set(int, int, []byte)
|
||||||
|
// Gets the size of each row in bytes (rounded up)
|
||||||
|
RowSize() int
|
||||||
|
// Gets references to underlying memory
|
||||||
|
Storage() []AttributeGroupStorageRef
|
||||||
|
// Returns a human-readable summary
|
||||||
|
String() string
|
||||||
|
}
|
||||||
|
|
||||||
|
// AttributeGroupStorageRef is a reference to a particular set
|
||||||
|
// of allocated rows within a FixedAttributeGroup
|
||||||
|
type AttributeGroupStorageRef struct {
|
||||||
|
Storage []byte
|
||||||
|
Rows int
|
||||||
|
}
|
122
base/pond.go
122
base/pond.go
@ -1,122 +0,0 @@
|
|||||||
package base
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Ponds contain a particular number of rows of
|
|
||||||
// a particular number of Attributes, all of a given type.
|
|
||||||
type Pond struct {
|
|
||||||
threadNo uint32
|
|
||||||
parent DataGrid
|
|
||||||
attributes []Attribute
|
|
||||||
size int
|
|
||||||
alloc [][]byte
|
|
||||||
maxRow int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Pond) String() string {
|
|
||||||
if len(p.alloc) > 1 {
|
|
||||||
return fmt.Sprintf("Pond(%d attributes\n thread: %d\n size: %d\n)", len(p.attributes), p.threadNo, p.size)
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("Pond(%d attributes\n thread: %d\n size: %d\n %d \n)", len(p.attributes), p.threadNo, p.size, p.alloc[0][0:60])
|
|
||||||
}
|
|
||||||
|
|
||||||
// PondStorageRef is a reference to a particular set
|
|
||||||
// of allocated rows within a Pond
|
|
||||||
type PondStorageRef struct {
|
|
||||||
Storage []byte
|
|
||||||
Rows int
|
|
||||||
}
|
|
||||||
|
|
||||||
// RowSize returns the size of each row in bytes
|
|
||||||
func (p *Pond) RowSize() int {
|
|
||||||
return len(p.attributes) * p.size
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attributes returns a slice of Attributes in this Pond
|
|
||||||
func (p *Pond) Attributes() []Attribute {
|
|
||||||
return p.attributes
|
|
||||||
}
|
|
||||||
|
|
||||||
// Storage returns a slice of PondStorageRefs which can
|
|
||||||
// be used to access the memory in this pond.
|
|
||||||
func (p *Pond) Storage() []PondStorageRef {
|
|
||||||
ret := make([]PondStorageRef, len(p.alloc))
|
|
||||||
rowSize := p.RowSize()
|
|
||||||
for i, b := range p.alloc {
|
|
||||||
ret[i] = PondStorageRef{b, len(b) / rowSize}
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Pond) resolveBlock(col int, row int) (int, int) {
|
|
||||||
|
|
||||||
if len(p.alloc) == 0 {
|
|
||||||
panic("No blocks to resolve")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find where in the pond the byte is
|
|
||||||
byteOffset := row*p.RowSize() + col*p.size
|
|
||||||
curOffset := 0
|
|
||||||
curBlock := 0
|
|
||||||
blockOffset := 0
|
|
||||||
for {
|
|
||||||
if curBlock >= len(p.alloc) {
|
|
||||||
panic("Don't have enough blocks to fulfill")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rows are not allowed to span blocks
|
|
||||||
blockAdd := len(p.alloc[curBlock])
|
|
||||||
blockAdd -= blockAdd % p.RowSize()
|
|
||||||
|
|
||||||
// Case 1: we need to skip this allocation
|
|
||||||
if curOffset+blockAdd < byteOffset {
|
|
||||||
curOffset += blockAdd
|
|
||||||
curBlock++
|
|
||||||
} else {
|
|
||||||
blockOffset = byteOffset - curOffset
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return curBlock, blockOffset
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Pond) set(col int, row int, val []byte) {
|
|
||||||
|
|
||||||
// Double-check the length
|
|
||||||
if len(val) != p.size {
|
|
||||||
panic(fmt.Sprintf("Tried to call set() with %d bytes, should be %d", len(val), p.size))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find where in the pond the byte is
|
|
||||||
curBlock, blockOffset := p.resolveBlock(col, row)
|
|
||||||
|
|
||||||
// Copy the value in
|
|
||||||
copied := copy(p.alloc[curBlock][blockOffset:], val)
|
|
||||||
if copied != p.size {
|
|
||||||
panic(fmt.Sprintf("set() terminated by only copying %d bytes into the current block (should be %d). Check EDF allocation", copied, p.size))
|
|
||||||
}
|
|
||||||
|
|
||||||
row++
|
|
||||||
if row > p.maxRow {
|
|
||||||
p.maxRow = row
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Pond) get(col int, row int) []byte {
|
|
||||||
curBlock, blockOffset := p.resolveBlock(col, row)
|
|
||||||
return p.alloc[curBlock][blockOffset : blockOffset+p.size]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Pond) appendToRowBuf(row int, buffer *bytes.Buffer) {
|
|
||||||
for i, a := range p.attributes {
|
|
||||||
postfix := " "
|
|
||||||
if i == len(p.attributes)-1 {
|
|
||||||
postfix = ""
|
|
||||||
}
|
|
||||||
buffer.WriteString(fmt.Sprintf("%s%s", a.GetStringFromSysVal(p.get(i, row)), postfix))
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user