mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-26 13:49:14 +08:00
346 lines
9.2 KiB
Go
346 lines
9.2 KiB
Go
package neural
|
|
|
|
import (
|
|
"fmt"
|
|
"github.com/gonum/matrix/mat64"
|
|
"github.com/sjwhitworth/golearn/base"
|
|
"github.com/sjwhitworth/golearn/filters"
|
|
"math"
|
|
"math/rand"
|
|
)
|
|
|
|
// MultiLayerNet creates a new Network which is conceptually
|
|
// organised into layers, zero or more of which are hidden.
|
|
//
|
|
// Within each layer, no neurons are connected.
|
|
//
|
|
// No neurons in a given layer are connected with any neurons
|
|
// in a previous layer.
|
|
//
|
|
// Neurons can only be connected to neurons in the layer above.
|
|
type MultiLayerNet struct {
|
|
network *Network
|
|
attrs map[base.Attribute]int
|
|
layers []int
|
|
classAttrOffset int
|
|
classAttrCount int
|
|
Convergence float64
|
|
MaxIterations int
|
|
LearningRate float64
|
|
}
|
|
|
|
// NewMultiLayerNet returns an underlying
|
|
// Network conceptuallyorganised into layers
|
|
//
|
|
// Layers variable = slice of integers representing
|
|
// node count at each layer.
|
|
func NewMultiLayerNet(layers []int) *MultiLayerNet {
|
|
return &MultiLayerNet{
|
|
nil,
|
|
make(map[base.Attribute]int),
|
|
layers,
|
|
0,
|
|
0,
|
|
0.001,
|
|
500,
|
|
0.90,
|
|
}
|
|
}
|
|
|
|
// String returns a human-readable summary of this network.
|
|
func (m *MultiLayerNet) String() string {
|
|
return fmt.Sprintf("MultiLayerNet(%v, %v, %.2f, %.2f, %d", m.layers, m.network, m.Convergence, m.LearningRate, m.MaxIterations)
|
|
}
|
|
|
|
func (m *MultiLayerNet) convertToFloatInsts(X base.FixedDataGrid) base.FixedDataGrid {
|
|
|
|
// Make sure everything's a FloatAttribute
|
|
fFilt := filters.NewFloatConvertFilter()
|
|
for _, a := range X.AllAttributes() {
|
|
fFilt.AddAttribute(a)
|
|
}
|
|
fFilt.Train()
|
|
insts := base.NewLazilyFilteredInstances(X, fFilt)
|
|
return insts
|
|
}
|
|
|
|
// Predict uses the underlying network to produce predictions for the
|
|
// class variables of X.
|
|
//
|
|
// Can only predict one CategoricalAttribute at a time, or up to n
|
|
// FloatAttributes. Set or unset ClassAttributes to work around this
|
|
// limitation.
|
|
func (m *MultiLayerNet) Predict(X base.FixedDataGrid) base.FixedDataGrid {
|
|
|
|
// Create the return vector
|
|
ret := base.GeneratePredictionVector(X)
|
|
|
|
// Make sure everything's a FloatAttribute
|
|
insts := m.convertToFloatInsts(X)
|
|
|
|
// Get the input/output Attributes
|
|
inputAttrs := base.NonClassAttributes(insts)
|
|
outputAttrs := ret.AllClassAttributes()
|
|
|
|
// Compute layers
|
|
layers := 2 + len(m.layers)
|
|
|
|
// Check that we're operating in a singular mode
|
|
floatMode := 0
|
|
categoricalMode := 0
|
|
for _, a := range outputAttrs {
|
|
if _, ok := a.(*base.CategoricalAttribute); ok {
|
|
categoricalMode++
|
|
} else if _, ok := a.(*base.FloatAttribute); ok {
|
|
floatMode++
|
|
} else {
|
|
panic("Unsupported output Attribute type!")
|
|
}
|
|
}
|
|
|
|
if floatMode > 0 && categoricalMode > 0 {
|
|
panic("Can't predict a mix of float and categorical Attributes")
|
|
} else if categoricalMode > 1 {
|
|
panic("Can't predict more than one categorical class Attribute")
|
|
}
|
|
|
|
// Create the activation vector
|
|
a := mat64.NewDense(m.network.size, 1, make([]float64, m.network.size))
|
|
|
|
// Resolve the input AttributeSpecs
|
|
inputAs := base.ResolveAttributes(insts, inputAttrs)
|
|
|
|
// Resolve the output Attributespecs
|
|
outputAs := base.ResolveAttributes(ret, outputAttrs)
|
|
|
|
// Map over each input row
|
|
insts.MapOverRows(inputAs, func(row [][]byte, rc int) (bool, error) {
|
|
// Clear the activation vector
|
|
for i := 0; i < m.network.size; i++ {
|
|
a.Set(i, 0, 0.0)
|
|
}
|
|
// Build the activation vector
|
|
for i, vb := range row {
|
|
if cIndex, ok := m.attrs[inputAs[i].GetAttribute()]; !ok {
|
|
panic("Can't resolve the Attribute!")
|
|
} else {
|
|
a.Set(cIndex, 0, base.UnpackBytesToFloat(vb))
|
|
}
|
|
}
|
|
// Robots, activate!
|
|
m.network.Activate(a, layers)
|
|
|
|
// Decide which class to set
|
|
if floatMode > 0 {
|
|
for _, as := range outputAs {
|
|
cIndex := m.attrs[as.GetAttribute()]
|
|
ret.Set(as, rc, base.PackFloatToBytes(a.At(cIndex, 0)))
|
|
}
|
|
} else {
|
|
maxIndex := 0
|
|
maxVal := 0.0
|
|
for i := m.classAttrOffset; i < m.classAttrOffset+m.classAttrCount; i++ {
|
|
val := a.At(i, 0)
|
|
if val > maxVal {
|
|
maxIndex = i
|
|
maxVal = val
|
|
}
|
|
}
|
|
maxIndex -= m.classAttrOffset
|
|
ret.Set(outputAs[0], rc, base.PackU64ToBytes(uint64(maxIndex)))
|
|
}
|
|
return true, nil
|
|
})
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
// Fit trains the neural network on the given fixed datagrid.
|
|
//
|
|
// Training stops when the mean-squared error acheived is less
|
|
// than the Convergence value, or when back-propagation has occured
|
|
// more times than the value set by MaxIterations.
|
|
func (m *MultiLayerNet) Fit(X base.FixedDataGrid) {
|
|
|
|
// Make sure everything's a FloatAttribute
|
|
insts := m.convertToFloatInsts(X)
|
|
|
|
// The size of the first layer is the number of things
|
|
// in the revised instances which aren't class Attributes
|
|
inputAttrsVec := base.NonClassAttributes(insts)
|
|
|
|
// The size of the output layer is the number of things
|
|
// in the revised instances which are class Attributes
|
|
classAttrsVec := insts.AllClassAttributes()
|
|
|
|
// The total number of layers is input layer + output layer
|
|
// plus number of layers specified
|
|
totalLayers := 2 + len(m.layers)
|
|
|
|
// The size is then augmented by the number of nodes
|
|
// in the centre
|
|
size := len(inputAttrsVec)
|
|
size += len(classAttrsVec)
|
|
hiddenSize := 0
|
|
for _, a := range m.layers {
|
|
size += a
|
|
hiddenSize += a
|
|
}
|
|
|
|
// Enumerate the Attributes
|
|
trainingAttrs := make(map[base.Attribute]int)
|
|
classAttrs := make(map[base.Attribute]int)
|
|
attrCounter := 0
|
|
for i, a := range inputAttrsVec {
|
|
attrCounter = i
|
|
m.attrs[a] = attrCounter
|
|
trainingAttrs[a] = attrCounter
|
|
}
|
|
m.classAttrOffset = attrCounter + 1
|
|
for _, a := range classAttrsVec {
|
|
attrCounter++
|
|
m.attrs[a] = attrCounter + hiddenSize
|
|
classAttrs[a] = attrCounter + hiddenSize
|
|
m.classAttrCount++
|
|
}
|
|
|
|
// Create the underlying Network
|
|
m.network = NewNetwork(size, len(inputAttrsVec), Sigmoid)
|
|
|
|
// Initialise inter-hidden layer weights and biases to small random values
|
|
layerOffset := len(inputAttrsVec)
|
|
for i := 0; i < len(m.layers)-1; i++ {
|
|
// Get the size of this layer
|
|
thisLayerSize := m.layers[i]
|
|
// Next layer size
|
|
nextLayerSize := m.layers[i+1]
|
|
// For every node in this layer
|
|
for j := 1; j <= thisLayerSize; j++ {
|
|
// Compute the offset
|
|
nodeOffset1 := layerOffset + j
|
|
// For every node in the next layer
|
|
for k := 1; k <= nextLayerSize; k++ {
|
|
// Compute offset
|
|
nodeOffset2 := layerOffset + thisLayerSize + k
|
|
// Set weight randomly
|
|
m.network.SetWeight(nodeOffset1, nodeOffset2, rand.NormFloat64()*0.1)
|
|
}
|
|
}
|
|
layerOffset += thisLayerSize
|
|
}
|
|
|
|
// Initialise biases with each hidden layer
|
|
layerOffset = len(inputAttrsVec)
|
|
for _, l := range m.layers {
|
|
for j := 1; j <= l; j++ {
|
|
nodeOffset := layerOffset + j
|
|
m.network.SetBias(nodeOffset, rand.NormFloat64()*0.1)
|
|
}
|
|
layerOffset += l
|
|
}
|
|
|
|
// Initialise biases for output layer
|
|
for i := 0; i < len(classAttrsVec); i++ {
|
|
nodeOffset := layerOffset + i
|
|
m.network.SetBias(nodeOffset, rand.NormFloat64()*0.1)
|
|
}
|
|
|
|
// Connect final hidden layer with the output layer
|
|
layerOffset = len(inputAttrsVec)
|
|
for i, l := range m.layers {
|
|
if i == len(m.layers)-1 {
|
|
for j := 1; j <= l; j++ {
|
|
nodeOffset1 := layerOffset + j
|
|
for k := 1; k <= len(classAttrsVec); k++ {
|
|
nodeOffset2 := layerOffset + l + k
|
|
m.network.SetWeight(nodeOffset1, nodeOffset2, rand.NormFloat64()*0.1)
|
|
}
|
|
}
|
|
}
|
|
layerOffset += l
|
|
}
|
|
|
|
// Connect input layer with first hidden layer (or output layer
|
|
for i := 1; i <= len(inputAttrsVec); i++ {
|
|
nextLayerLen := 0
|
|
if len(m.layers) > 0 {
|
|
nextLayerLen = m.layers[0]
|
|
} else {
|
|
nextLayerLen = len(classAttrsVec)
|
|
}
|
|
for j := 1; j <= nextLayerLen; j++ {
|
|
nodeOffset := len(inputAttrsVec) + j
|
|
v := rand.NormFloat64() * 0.1
|
|
m.network.SetWeight(i, nodeOffset, v)
|
|
}
|
|
}
|
|
|
|
// Create the training activation vector
|
|
trainVec := mat64.NewDense(size, 1, make([]float64, size))
|
|
// Create the error vector
|
|
errVec := mat64.NewDense(size, 1, make([]float64, size))
|
|
|
|
// Resolve training AttributeSpecs
|
|
trainAs := base.ResolveAllAttributes(insts)
|
|
|
|
// Feed-forward, compute error and update for each training example
|
|
// until convergence (what's that)
|
|
for iteration := 0; iteration < m.MaxIterations; iteration++ {
|
|
totalError := 0.0
|
|
maxRow := 0
|
|
insts.MapOverRows(trainAs, func(row [][]byte, i int) (bool, error) {
|
|
|
|
maxRow = i
|
|
// Clear vectors
|
|
for i := 0; i < size; i++ {
|
|
trainVec.Set(i, 0, 0.0)
|
|
errVec.Set(i, 0, 0.0)
|
|
}
|
|
|
|
// Build vectors
|
|
for i, vb := range row {
|
|
v := base.UnpackBytesToFloat(vb)
|
|
if attrIndex, ok := trainingAttrs[trainAs[i].GetAttribute()]; ok {
|
|
// Add to Activation vector
|
|
trainVec.Set(attrIndex, 0, v)
|
|
} else if attrIndex, ok := classAttrs[trainAs[i].GetAttribute()]; ok {
|
|
// Set to error vector
|
|
errVec.Set(attrIndex, 0, v)
|
|
} else {
|
|
panic("Should be able to find this Attribute!")
|
|
}
|
|
}
|
|
|
|
// Activate the network
|
|
m.network.Activate(trainVec, totalLayers-1)
|
|
|
|
// Compute the error
|
|
for a := range classAttrs {
|
|
cIndex := classAttrs[a]
|
|
errVec.Set(cIndex, 0, errVec.At(cIndex, 0)-trainVec.At(cIndex, 0))
|
|
}
|
|
|
|
// Update total error
|
|
totalError += math.Abs(errVec.Sum())
|
|
|
|
// Back-propagate the error
|
|
b := m.network.Error(trainVec, errVec, totalLayers)
|
|
|
|
// Update the weights
|
|
m.network.UpdateWeights(trainVec, b, m.LearningRate)
|
|
|
|
// Update the biases
|
|
m.network.UpdateBias(b, m.LearningRate)
|
|
|
|
return true, nil
|
|
})
|
|
|
|
totalError /= float64(maxRow)
|
|
// If we've converged, no need to carry on
|
|
if totalError < m.Convergence {
|
|
break
|
|
}
|
|
}
|
|
}
|