1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-26 13:49:14 +08:00
golearn/linear_models/liblinear.go
Richard Townsend 5ceb4e7111 linear_models: fix cgo issues, upgrade to liblinear 2.14
Requires an additional step to install:
 - cd /tmp &&
 - wget https://github.com/cjlin1/liblinear/archive/v241.tar.gz
 - tar xvf v241.tar.gz
 - cd liblinear-241
 - make lib
 - sudo install -vm644 linear.h /usr/include
 - sudo install -vm755 liblinear.so.4 /usr/lib
 - sudo ln -sfv liblinear.so.4 /usr/lib/liblinear.so
2020-09-06 10:01:24 +01:00

212 lines
5.8 KiB
Go

package linear_models
/*
#include "integration.h"
#cgo CFLAGS:
#cgo CXXFLAGS: -std=c++11 -g -O0
#cgo LDFLAGS: -g -llinear
*/
import "C"
import (
"fmt"
"runtime"
)
// Problem wraps a libsvm problem struct which describes a classification/
// regression problem. No externally-accessible fields.
type Problem struct {
c_prob *C.struct_problem
}
// Free releases resources associated with a libsvm problem.
func (p *Problem) Free() {
C.FreeCProblem(p.c_prob)
}
// Parameter encasulates all the possible libsvm training options.
// TODO: make user control of these more extensive.
type Parameter struct {
c_param *C.struct_parameter
}
// Free releases resources associated with a Parameter.
func (p *Parameter) Free() {
C.FreeCParameter(p.c_param)
}
// Model encapsulates a trained libsvm model.
type Model struct {
c_model *C.struct_model
}
// Free releases resources associated with a trained libsvm model.
func (m *Model) Free() {
C.FreeCModel(m.c_model)
}
const (
L2R_LR = C.L2R_LR
L2R_L2LOSS_SVC_DUAL = C.L2R_L2LOSS_SVC_DUAL
L2R_L2LOSS_SVC = C.L2R_L2LOSS_SVC
L2R_L1LOSS_SVC_DUAL = C.L2R_L1LOSS_SVC_DUAL
MCSVM_CS = C.MCSVM_CS
L1R_L2LOSS_SVC = C.L1R_L2LOSS_SVC
L1R_LR = C.L1R_LR
L2R_LR_DUAL = C.L2R_LR_DUAL
)
// NewParameter creates a libsvm parameter structure, which controls
// various aspects of libsvm training.
// For more information on what these parameters do, consult the
// "`train` usage" section of
// https://github.com/cjlin1/liblinear/blob/master/README
func NewParameter(solver_type int, C float64, eps float64) *Parameter {
param := &Parameter{C.CreateCParameter()}
runtime.SetFinalizer(param, (*Parameter).Free)
param.c_param.solver_type = C.int(solver_type)
param.c_param.eps = C.double(eps)
param.c_param.C = C.double(C)
param.c_param.nr_weight = C.int(0)
param.c_param.weight_label = nil
param.c_param.weight = nil
return param
}
// NewProblem creates input to libsvm which describes a particular
// regression/classification problem. It requires an array of float values
// and an array of y values.
func NewProblem(X [][]float64, y []float64, bias float64) *Problem {
prob := &Problem{C.CreateCProblem()}
runtime.SetFinalizer(prob, (*Problem).Free)
prob.c_prob.l = C.int(len(X))
prob.c_prob.n = C.int(len(X[0]) + 1)
convert_features(prob, X, bias)
C.AllocateLabelsForProblem(prob.c_prob, C.int(len(y)))
for i := 0; i < len(y); i++ {
C.AssignLabelForProblem(prob.c_prob, C.int(i), C.double(y[i]))
}
// Should not go out of scope until the Problem struct
// is cleaned up.
prob.c_prob.bias = C.double(-1)
return prob
}
// Train invokes libsvm and returns a trained model.
func Train(prob *Problem, param *Parameter) *Model {
libLinearHookPrintFunc() // Sets up logging
out := C.train(prob.c_prob, param.c_param)
m := &Model{out}
runtime.SetFinalizer(m, (*Model).Free)
return m
}
func Export(model *Model, filePath string) error {
status := C.save_model(C.CString(filePath), (*C.struct_model)(model.c_model))
if status != 0 {
return fmt.Errorf("Problem occured during export to %s (status was %d)", filePath, status)
}
return nil
}
func Load(model *Model, filePath string) error {
model.c_model = C.load_model(C.CString(filePath))
if model.c_model == nil {
return fmt.Errorf("Something went wrong")
}
return nil
}
// Predict takes a row of float values corresponding to a particular
// input and returns the regression result.
func Predict(model *Model, x []float64) float64 {
cX := convertVector(x, 0)
cY := C.predict((*C.struct_model)(model.c_model), &cX[0])
y := float64(cY)
return y
}
// convertVector is an internal function used for converting
// dense float64 vectors into the sparse input that libsvm accepts.
func convertVector(x []float64, bias float64) []C.struct_feature_node {
// Count the number of non-zero elements
nElements := 0
for i := 0; i < len(x); i++ {
if x[i] > 0 {
nElements++
}
}
// Add one at the end for the -1 terminator
nElements++
if bias >= 0 {
// And one for the bias, if we have it
nElements++
}
cX := make([]C.struct_feature_node, nElements)
j := 0
for i := 0; i < len(x); i++ {
if x[i] > 0 {
cX[j].index = C.int(i + 1)
cX[j].value = C.double(x[i])
j++
}
}
if bias >= 0 {
cX[j].index = C.int(0)
cX[j].value = C.double(0)
j++
}
cX[j].index = C.int(-1)
return cX
}
// convert_features is an internal function used for converting
// dense 2D arrays of float values into the sparse format libsvm accepts.
func convert_features(prob *Problem, X [][]float64, bias float64) {
nonZeroRowElements := make([]C.int, len(X))
totalElements := 0
for i := 0; i < len(X); i++ {
// For each row of input data, we count how many non-zero things are in the row
nonZeroElementsInRow := 1 // Initially one, because we need the -1 null terminator
for j := 0; j < len(X[i]); j++ {
if X[i][j] != 0.0 {
nonZeroElementsInRow++
}
if bias >= 0 {
nonZeroElementsInRow++
}
}
nonZeroRowElements[i] = C.int(nonZeroElementsInRow)
totalElements += nonZeroElementsInRow
}
// Allocate one feature vector for each row, total number
C.AllocateFeatureNodesForProblem(prob.c_prob, C.int(len(X)), C.int(totalElements), &nonZeroRowElements[0])
for i := 0; i < len(X); i++ {
nonZeroElementCounter := 0
for j := 0; j < len(X[i]); j++ {
if X[i][j] != 0.0 {
xSpace := C.GetFeatureNodeForIndex(prob.c_prob, C.int(i), C.int(nonZeroElementCounter))
xSpace.index = C.int(j + 1)
xSpace.value = C.double(X[i][j])
nonZeroElementCounter++
}
}
if bias >= 0 {
xSpace := C.GetFeatureNodeForIndex(prob.c_prob, C.int(i), C.int(nonZeroElementCounter))
xSpace.index = C.int(len(X[i]) + 1)
xSpace.value = C.double(bias)
nonZeroElementCounter++
}
xSpace := C.GetFeatureNodeForIndex(prob.c_prob, C.int(i), C.int(nonZeroElementCounter))
xSpace.index = C.int(-1)
xSpace.value = C.double(0)
}
}