package optimisation import "github.com/gonum/matrix/mat64" // BatchGradientDescent finds the local minimum of a function. // See http://en.wikipedia.org/wiki/Gradient_descent for more details. func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense { m, _ := y.Dims() // Helper function for scalar multiplication mult := func(r, c int, v float64) float64 { return v * 1.0 / float64(m) * alpha } for i := 0; i < epoch; i++ { grad := mat64.DenseCopyOf(x) grad.TCopy(grad) temp := mat64.DenseCopyOf(x) // Calculate our best prediction, given theta temp.Mul(temp, theta) // Calculate our error from the real values temp.Sub(temp, y) grad.Mul(grad, temp) // Multiply by scalar factor grad.Apply(mult, grad) // Take a step in gradient direction theta.Sub(theta, grad) } return theta } // StochasticGradientDescent updates the parameters of theta on a random row selection from a matrix. // It is faster as it does not compute the cost function over the entire dataset every time. // It instead calculates the error parameters over only one row of the dataset at a time. // In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes // (the number of goroutines spawned is specified by the procs variable) in parallel and taking an average of the result. func StochasticGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch, procs int) *mat64.Dense { m, _ := y.Dims() resultPipe := make(chan *mat64.Dense) results := make([]*mat64.Dense, 0) // Helper function for scalar multiplication mult := func(r, c int, v float64) float64 { return v * 1.0 / float64(m) * alpha } for p := 0; p < procs; p++ { go func() { // Is this just a pointer to theta? thetaCopy := mat64.DenseCopyOf(theta) for i := 0; i < epoch; i++ { for k := 0; k < m; k++ { datXtemp := x.RowView(k) datYtemp := y.RowView(k) datX := mat64.NewDense(1, len(datXtemp), datXtemp) datY := mat64.NewDense(1, 1, datYtemp) grad := mat64.DenseCopyOf(datX) grad.TCopy(grad) datX.Mul(datX, thetaCopy) datX.Sub(datX, datY) grad.Mul(grad, datX) // Multiply by scalar factor grad.Apply(mult, grad) // Take a step in gradient direction thetaCopy.Sub(thetaCopy, grad) } } resultPipe <- thetaCopy }() } for { select { case d := <-resultPipe: results = append(results, d) if len(results) == procs { return averageTheta(results) } } } } func averageTheta(matrices []*mat64.Dense) *mat64.Dense { if len(matrices) < 2 { panic("Must provide at least two matrices to average") } invLen := 1.0 / float64(len(matrices)) // Helper function for scalar multiplication mult := func(r, c int, v float64) float64 { return v * invLen} // Sum matrices average := matrices[0] for i := 1; i < len(matrices); i++ { average.Add(average, matrices[i]) } // Calculate the average average.Apply(mult, average) return average }