mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-28 13:48:56 +08:00
118 lines
3.4 KiB
Go
118 lines
3.4 KiB
Go
package optimisation
|
|
|
|
import "github.com/gonum/matrix/mat64"
|
|
|
|
// Batch gradient descent finds the local minimum of a function.
|
|
// See http://en.wikipedia.org/wiki/Gradient_descent for more details.
|
|
func BatchGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch int) *mat64.Dense {
|
|
m, _ := y.Dims()
|
|
for i := 0; i < epoch; i++ {
|
|
xFlat := mat64.DenseCopyOf(x)
|
|
xFlat.TCopy(xFlat)
|
|
temp := mat64.DenseCopyOf(x)
|
|
|
|
// Calculate our best prediction, given theta
|
|
temp.Mul(temp, theta)
|
|
|
|
// Calculate our error from the real values
|
|
temp.Sub(temp, y)
|
|
xFlat.Mul(xFlat, temp)
|
|
|
|
// Temporary hack to get around the fact there is no scalar division in mat64
|
|
xFlatRow, _ := xFlat.Dims()
|
|
gradient := make([]float64, 0)
|
|
for k := 0; k < xFlatRow; k++ {
|
|
row := xFlat.RowView(k)
|
|
for v := range row {
|
|
divd := row[v] / float64(m) * alpha
|
|
gradient = append(gradient, divd)
|
|
}
|
|
}
|
|
grows := len(gradient)
|
|
grad := mat64.NewDense(grows, 1, gradient)
|
|
theta.Sub(theta, grad)
|
|
}
|
|
return theta
|
|
}
|
|
|
|
// Stochastic gradient descent updates the parameters of theta on a random row selection from a matrix.
|
|
// It is faster as it does not compute the cost function over the entire dataset every time.
|
|
// It instead calculates the error parameters over only one row of the dataset at a time.
|
|
// In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes
|
|
// (the number of goroutines spawned is specified by the procs variable) in parallel and taking an average of the result.
|
|
func StochasticGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch, procs int) *mat64.Dense {
|
|
m, _ := y.Dims()
|
|
resultPipe := make(chan *mat64.Dense)
|
|
results := make([]*mat64.Dense, 0)
|
|
|
|
for p := 0; p < procs; p++ {
|
|
go func() {
|
|
// Is this just a pointer to theta?
|
|
thetaCopy := mat64.DenseCopyOf(theta)
|
|
for i := 0; i < epoch; i++ {
|
|
for k := 0; k < m; k++ {
|
|
datXtemp := x.RowView(k)
|
|
datYtemp := y.RowView(k)
|
|
datX := mat64.NewDense(1, len(datXtemp), datXtemp)
|
|
datY := mat64.NewDense(1, 1, datYtemp)
|
|
datXFlat := mat64.DenseCopyOf(datX)
|
|
datXFlat.TCopy(datXFlat)
|
|
datX.Mul(datX, thetaCopy)
|
|
datX.Sub(datX, datY)
|
|
datXFlat.Mul(datXFlat, datX)
|
|
|
|
// Horrible hack to get around the fact there is no elementwise division in mat64
|
|
xFlatRow, _ := datXFlat.Dims()
|
|
gradient := make([]float64, 0)
|
|
for i := 0; i < xFlatRow; i++ {
|
|
row := datXFlat.RowView(i)
|
|
for i := range row {
|
|
divd := row[i] / float64(m) * alpha
|
|
gradient = append(gradient, divd)
|
|
}
|
|
}
|
|
grows := len(gradient)
|
|
grad := mat64.NewDense(grows, 1, gradient)
|
|
thetaCopy.Sub(thetaCopy, grad)
|
|
}
|
|
|
|
}
|
|
resultPipe <- thetaCopy
|
|
}()
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case d := <-resultPipe:
|
|
results = append(results, d)
|
|
if len(results) == procs {
|
|
return averageTheta(results)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func averageTheta(matrices []*mat64.Dense) *mat64.Dense {
|
|
if len(matrices) < 2 {
|
|
panic("Must provide at least two matrices to average")
|
|
}
|
|
base := matrices[0]
|
|
rows, _ := base.Dims()
|
|
|
|
for i := 1; i < len(matrices); i++ {
|
|
base.Add(base, matrices[i])
|
|
}
|
|
|
|
averaged := make([]float64, 0)
|
|
for i := 0; i < rows; i++ {
|
|
row := base.RowView(i)
|
|
for i := range row {
|
|
divd := row[i] / float64(len(matrices))
|
|
averaged = append(averaged, divd)
|
|
}
|
|
}
|
|
|
|
baseDense := mat64.NewDense(rows, 1, averaged)
|
|
return baseDense
|
|
}
|