package neural import ( "fmt" "github.com/gonum/matrix/mat64" "github.com/sjwhitworth/golearn/base" "github.com/sjwhitworth/golearn/filters" "math" "math/rand" ) // MultiLayerNet creates a new Network which is conceptually // organised into layers, zero or more of which are hidden. // // Within each layer, no neurons are connected. // // No neurons in a given layer are connected with any neurons // in a previous layer. // // Neurons can only be connected to neurons in the layer above. type MultiLayerNet struct { network *Network attrs map[base.Attribute]int layers []int classAttrOffset int classAttrCount int Convergence float64 MaxIterations int LearningRate float64 } // NewMultiLayerNet returns an underlying // Network conceptuallyorganised into layers // // Layers variable = slice of integers representing // node count at each layer. func NewMultiLayerNet(layers []int) *MultiLayerNet { return &MultiLayerNet{ nil, make(map[base.Attribute]int), layers, 0, 0, 0.001, 500, 0.90, } } // String returns a human-readable summary of this network. func (m *MultiLayerNet) String() string { return fmt.Sprintf("MultiLayerNet(%v, %v, %.2f, %.2f, %d", m.layers, m.network, m.Convergence, m.LearningRate, m.MaxIterations) } func (m *MultiLayerNet) convertToFloatInsts(X base.FixedDataGrid) base.FixedDataGrid { // Make sure everything's a FloatAttribute fFilt := filters.NewFloatConvertFilter() for _, a := range X.AllAttributes() { fFilt.AddAttribute(a) } fFilt.Train() insts := base.NewLazilyFilteredInstances(X, fFilt) return insts } // Predict uses the underlying network to produce predictions for the // class variables of X. // // Can only predict one CategoricalAttribute at a time, or up to n // FloatAttributes. Set or unset ClassAttributes to work around this // limitation. func (m *MultiLayerNet) Predict(X base.FixedDataGrid) base.FixedDataGrid { // Create the return vector ret := base.GeneratePredictionVector(X) // Make sure everything's a FloatAttribute insts := m.convertToFloatInsts(X) // Get the input/output Attributes inputAttrs := base.NonClassAttributes(insts) outputAttrs := ret.AllClassAttributes() // Compute layers layers := 2 + len(m.layers) // Check that we're operating in a singular mode floatMode := 0 categoricalMode := 0 for _, a := range outputAttrs { if _, ok := a.(*base.CategoricalAttribute); ok { categoricalMode++ } else if _, ok := a.(*base.FloatAttribute); ok { floatMode++ } else { panic("Unsupported output Attribute type!") } } if floatMode > 0 && categoricalMode > 0 { panic("Can't predict a mix of float and categorical Attributes") } else if categoricalMode > 1 { panic("Can't predict more than one categorical class Attribute") } // Create the activation vector a := mat64.NewDense(m.network.size, 1, make([]float64, m.network.size)) // Resolve the input AttributeSpecs inputAs := base.ResolveAttributes(insts, inputAttrs) // Resolve the output Attributespecs outputAs := base.ResolveAttributes(ret, outputAttrs) // Map over each input row insts.MapOverRows(inputAs, func(row [][]byte, rc int) (bool, error) { // Clear the activation vector for i := 0; i < m.network.size; i++ { a.Set(i, 0, 0.0) } // Build the activation vector for i, vb := range row { if cIndex, ok := m.attrs[inputAs[i].GetAttribute()]; !ok { panic("Can't resolve the Attribute!") } else { a.Set(cIndex, 0, base.UnpackBytesToFloat(vb)) } } // Robots, activate! m.network.Activate(a, layers) // Decide which class to set if floatMode > 0 { for _, as := range outputAs { cIndex := m.attrs[as.GetAttribute()] ret.Set(as, rc, base.PackFloatToBytes(a.At(cIndex, 0))) } } else { maxIndex := 0 maxVal := 0.0 for i := m.classAttrOffset; i < m.classAttrOffset+m.classAttrCount; i++ { val := a.At(i, 0) if val > maxVal { maxIndex = i maxVal = val } } maxIndex -= m.classAttrOffset ret.Set(outputAs[0], rc, base.PackU64ToBytes(uint64(maxIndex))) } return true, nil }) return ret } // Fit trains the neural network on the given fixed datagrid. // // Training stops when the mean-squared error acheived is less // than the Convergence value, or when back-propagation has occured // more times than the value set by MaxIterations. func (m *MultiLayerNet) Fit(X base.FixedDataGrid) { // Make sure everything's a FloatAttribute insts := m.convertToFloatInsts(X) // The size of the first layer is the number of things // in the revised instances which aren't class Attributes inputAttrsVec := base.NonClassAttributes(insts) // The size of the output layer is the number of things // in the revised instances which are class Attributes classAttrsVec := insts.AllClassAttributes() // The total number of layers is input layer + output layer // plus number of layers specified totalLayers := 2 + len(m.layers) // The size is then augmented by the number of nodes // in the centre size := len(inputAttrsVec) size += len(classAttrsVec) hiddenSize := 0 for _, a := range m.layers { size += a hiddenSize += a } // Enumerate the Attributes trainingAttrs := make(map[base.Attribute]int) classAttrs := make(map[base.Attribute]int) attrCounter := 0 for i, a := range inputAttrsVec { attrCounter = i m.attrs[a] = attrCounter trainingAttrs[a] = attrCounter } m.classAttrOffset = attrCounter + 1 for _, a := range classAttrsVec { attrCounter++ m.attrs[a] = attrCounter + hiddenSize classAttrs[a] = attrCounter + hiddenSize m.classAttrCount++ } // Create the underlying Network m.network = NewNetwork(size, len(inputAttrsVec), Sigmoid) // Initialise inter-hidden layer weights and biases to small random values layerOffset := len(inputAttrsVec) for i := 0; i < len(m.layers)-1; i++ { // Get the size of this layer thisLayerSize := m.layers[i] // Next layer size nextLayerSize := m.layers[i+1] // For every node in this layer for j := 1; j <= thisLayerSize; j++ { // Compute the offset nodeOffset1 := layerOffset + j // For every node in the next layer for k := 1; k <= nextLayerSize; k++ { // Compute offset nodeOffset2 := layerOffset + thisLayerSize + k // Set weight randomly m.network.SetWeight(nodeOffset1, nodeOffset2, rand.NormFloat64()*0.1) } } layerOffset += thisLayerSize } // Initialise biases with each hidden layer layerOffset = len(inputAttrsVec) for _, l := range m.layers { for j := 1; j <= l; j++ { nodeOffset := layerOffset + j m.network.SetBias(nodeOffset, rand.NormFloat64()*0.1) } layerOffset += l } // Initialise biases for output layer for i := 0; i < len(classAttrsVec); i++ { nodeOffset := layerOffset + i m.network.SetBias(nodeOffset, rand.NormFloat64()*0.1) } // Connect final hidden layer with the output layer layerOffset = len(inputAttrsVec) for i, l := range m.layers { if i == len(m.layers)-1 { for j := 1; j <= l; j++ { nodeOffset1 := layerOffset + j for k := 1; k <= len(classAttrsVec); k++ { nodeOffset2 := layerOffset + l + k m.network.SetWeight(nodeOffset1, nodeOffset2, rand.NormFloat64()*0.1) } } } layerOffset += l } // Connect input layer with first hidden layer (or output layer for i := 1; i <= len(inputAttrsVec); i++ { nextLayerLen := 0 if len(m.layers) > 0 { nextLayerLen = m.layers[0] } else { nextLayerLen = len(classAttrsVec) } for j := 1; j <= nextLayerLen; j++ { nodeOffset := len(inputAttrsVec) + j v := rand.NormFloat64() * 0.1 m.network.SetWeight(i, nodeOffset, v) } } // Create the training activation vector trainVec := mat64.NewDense(size, 1, make([]float64, size)) // Create the error vector errVec := mat64.NewDense(size, 1, make([]float64, size)) // Resolve training AttributeSpecs trainAs := base.ResolveAllAttributes(insts) // Feed-forward, compute error and update for each training example // until convergence (what's that) for iteration := 0; iteration < m.MaxIterations; iteration++ { totalError := 0.0 maxRow := 0 insts.MapOverRows(trainAs, func(row [][]byte, i int) (bool, error) { maxRow = i // Clear vectors for i := 0; i < size; i++ { trainVec.Set(i, 0, 0.0) errVec.Set(i, 0, 0.0) } // Build vectors for i, vb := range row { v := base.UnpackBytesToFloat(vb) if attrIndex, ok := trainingAttrs[trainAs[i].GetAttribute()]; ok { // Add to Activation vector trainVec.Set(attrIndex, 0, v) } else if attrIndex, ok := classAttrs[trainAs[i].GetAttribute()]; ok { // Set to error vector errVec.Set(attrIndex, 0, v) } else { panic("Should be able to find this Attribute!") } } // Activate the network m.network.Activate(trainVec, totalLayers-1) // Compute the error for a := range classAttrs { cIndex := classAttrs[a] errVec.Set(cIndex, 0, errVec.At(cIndex, 0)-trainVec.At(cIndex, 0)) } // Update total error totalError += math.Abs(errVec.Sum()) // Back-propagate the error b := m.network.Error(trainVec, errVec, totalLayers) // Update the weights m.network.UpdateWeights(trainVec, b, m.LearningRate) // Update the biases m.network.UpdateBias(b, m.LearningRate) return true, nil }) totalError /= float64(maxRow) // If we've converged, no need to carry on if totalError < m.Convergence { break } } }