1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-25 13:48:49 +08:00
golearn/optimisation/gradient_descent.go

102 lines
2.9 KiB
Go

package optimisation
import "github.com/gonum/matrix/mat64"
// BatchGradientDescent finds the local minimum of a function.
// See http://en.wikipedia.org/wiki/Gradient_descent for more details.
func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense {
m, _ := y.Dims()
// Helper function for scalar multiplication
mult := func(r, c int, v float64) float64 { return v * 1.0 / float64(m) * alpha }
for i := 0; i < epoch; i++ {
grad := mat64.DenseCopyOf(x)
grad.TCopy(grad)
temp := mat64.DenseCopyOf(x)
// Calculate our best prediction, given theta
temp.Mul(temp, theta)
// Calculate our error from the real values
temp.Sub(temp, y)
grad.Mul(grad, temp)
// Multiply by scalar factor
grad.Apply(mult, grad)
// Take a step in gradient direction
theta.Sub(theta, grad)
}
return theta
}
// StochasticGradientDescent updates the parameters of theta on a random row selection from a matrix.
// It is faster as it does not compute the cost function over the entire dataset every time.
// It instead calculates the error parameters over only one row of the dataset at a time.
// In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes
// (the number of goroutines spawned is specified by the procs variable) in parallel and taking an average of the result.
func StochasticGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch, procs int) *mat64.Dense {
m, _ := y.Dims()
resultPipe := make(chan *mat64.Dense)
results := make([]*mat64.Dense, 0)
// Helper function for scalar multiplication
mult := func(r, c int, v float64) float64 { return v * 1.0 / float64(m) * alpha }
for p := 0; p < procs; p++ {
go func() {
// Is this just a pointer to theta?
thetaCopy := mat64.DenseCopyOf(theta)
for i := 0; i < epoch; i++ {
for k := 0; k < m; k++ {
datXtemp := x.RowView(k)
datYtemp := y.RowView(k)
datX := mat64.NewDense(1, len(datXtemp), datXtemp)
datY := mat64.NewDense(1, 1, datYtemp)
grad := mat64.DenseCopyOf(datX)
grad.TCopy(grad)
datX.Mul(datX, thetaCopy)
datX.Sub(datX, datY)
grad.Mul(grad, datX)
// Multiply by scalar factor
grad.Apply(mult, grad)
// Take a step in gradient direction
thetaCopy.Sub(thetaCopy, grad)
}
}
resultPipe <- thetaCopy
}()
}
for {
select {
case d := <-resultPipe:
results = append(results, d)
if len(results) == procs {
return averageTheta(results)
}
}
}
}
func averageTheta(matrices []*mat64.Dense) *mat64.Dense {
if len(matrices) < 2 {
panic("Must provide at least two matrices to average")
}
invLen := 1.0 / float64(len(matrices))
// Helper function for scalar multiplication
mult := func(r, c int, v float64) float64 { return v * invLen}
// Sum matrices
average := matrices[0]
for i := 1; i < len(matrices); i++ {
average.Add(average, matrices[i])
}
// Calculate the average
average.Apply(mult, average)
return average
}