mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-25 13:48:49 +08:00
102 lines
2.9 KiB
Go
102 lines
2.9 KiB
Go
package optimisation
|
|
|
|
import "github.com/gonum/matrix/mat64"
|
|
|
|
// BatchGradientDescent finds the local minimum of a function.
|
|
// See http://en.wikipedia.org/wiki/Gradient_descent for more details.
|
|
func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense {
|
|
m, _ := y.Dims()
|
|
// Helper function for scalar multiplication
|
|
mult := func(r, c int, v float64) float64 { return v * 1.0 / float64(m) * alpha }
|
|
|
|
for i := 0; i < epoch; i++ {
|
|
grad := mat64.DenseCopyOf(x)
|
|
grad.TCopy(grad)
|
|
temp := mat64.DenseCopyOf(x)
|
|
|
|
// Calculate our best prediction, given theta
|
|
temp.Mul(temp, theta)
|
|
|
|
// Calculate our error from the real values
|
|
temp.Sub(temp, y)
|
|
grad.Mul(grad, temp)
|
|
|
|
// Multiply by scalar factor
|
|
grad.Apply(mult, grad)
|
|
|
|
// Take a step in gradient direction
|
|
theta.Sub(theta, grad)
|
|
}
|
|
|
|
return theta
|
|
}
|
|
|
|
// StochasticGradientDescent updates the parameters of theta on a random row selection from a matrix.
|
|
// It is faster as it does not compute the cost function over the entire dataset every time.
|
|
// It instead calculates the error parameters over only one row of the dataset at a time.
|
|
// In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes
|
|
// (the number of goroutines spawned is specified by the procs variable) in parallel and taking an average of the result.
|
|
func StochasticGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch, procs int) *mat64.Dense {
|
|
m, _ := y.Dims()
|
|
resultPipe := make(chan *mat64.Dense)
|
|
results := make([]*mat64.Dense, 0)
|
|
// Helper function for scalar multiplication
|
|
mult := func(r, c int, v float64) float64 { return v * 1.0 / float64(m) * alpha }
|
|
|
|
for p := 0; p < procs; p++ {
|
|
go func() {
|
|
// Is this just a pointer to theta?
|
|
thetaCopy := mat64.DenseCopyOf(theta)
|
|
for i := 0; i < epoch; i++ {
|
|
for k := 0; k < m; k++ {
|
|
datXtemp := x.RowView(k)
|
|
datYtemp := y.RowView(k)
|
|
datX := mat64.NewDense(1, len(datXtemp), datXtemp)
|
|
datY := mat64.NewDense(1, 1, datYtemp)
|
|
grad := mat64.DenseCopyOf(datX)
|
|
grad.TCopy(grad)
|
|
datX.Mul(datX, thetaCopy)
|
|
datX.Sub(datX, datY)
|
|
grad.Mul(grad, datX)
|
|
|
|
// Multiply by scalar factor
|
|
grad.Apply(mult, grad)
|
|
|
|
// Take a step in gradient direction
|
|
thetaCopy.Sub(thetaCopy, grad)
|
|
}
|
|
|
|
}
|
|
resultPipe <- thetaCopy
|
|
}()
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case d := <-resultPipe:
|
|
results = append(results, d)
|
|
if len(results) == procs {
|
|
return averageTheta(results)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func averageTheta(matrices []*mat64.Dense) *mat64.Dense {
|
|
if len(matrices) < 2 {
|
|
panic("Must provide at least two matrices to average")
|
|
}
|
|
invLen := 1.0 / float64(len(matrices))
|
|
// Helper function for scalar multiplication
|
|
mult := func(r, c int, v float64) float64 { return v * invLen}
|
|
// Sum matrices
|
|
average := matrices[0]
|
|
for i := 1; i < len(matrices); i++ {
|
|
average.Add(average, matrices[i])
|
|
}
|
|
|
|
// Calculate the average
|
|
average.Apply(mult, average)
|
|
return average
|
|
}
|