1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-26 13:49:14 +08:00

Merge e87a7e4aadc60d0d452779201bf842310d84a85d into 3e43e74895fbe667cb83c7fd8278238eb001f667

This commit is contained in:
Richard Townsend 2020-07-19 14:22:42 +02:00 committed by GitHub
commit 2cdf2c77da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 260 additions and 93 deletions

View File

@ -1,16 +1,16 @@
language: go
dist: bionic
go:
- "1.9"
- "1.10"
- "1.11"
- "1.12"
- "1.13"
- "1.14"
arch:
- amd64
- arm64
env:
# Temporary workaround for go 1.6
- GODEBUG=cgocheck=0
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq libatlas-base-dev
- cd /tmp && wget http://www.csie.ntu.edu.tw/~cjlin/liblinear/oldfiles/liblinear-1.94.tar.gz && tar xf liblinear-1.94.tar.gz && cd liblinear-1.94 && make lib && sudo install -vm644 linear.h /usr/include && sudo install -vm755 liblinear.so.1 /usr/lib && sudo ln -sfv liblinear.so.1 /usr/lib/liblinear.so
- cd /tmp && wget https://www.csie.ntu.edu.tw/~cjlin/liblinear/oldfiles/liblinear-1.94.tar.gz && tar xf liblinear-1.94.tar.gz && cd liblinear-1.94 && make lib && sudo install -vm644 linear.h /usr/include && sudo install -vm755 liblinear.so.1 /usr/lib && sudo ln -sfv liblinear.so.1 /usr/lib/liblinear.so
- cd $TRAVIS_BUILD_DIR
install:
- go get github.com/smartystreets/goconvey/convey

View File

@ -1,11 +1,12 @@
package ensemble
import (
"io/ioutil"
"testing"
"github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/evaluation"
. "github.com/smartystreets/goconvey/convey"
"io/ioutil"
"testing"
)
func TestMultiSVMUnweighted(t *testing.T) {
@ -14,40 +15,43 @@ func TestMultiSVMUnweighted(t *testing.T) {
So(err, ShouldBeNil)
X, Y := base.InstancesTrainTestSplit(inst, 0.4)
m := NewMultiLinearSVC("l1", "l2", true, 1.0, 1e-4, nil)
m.Fit(X)
Convey("Predictions should work...", func() {
predictions, err := m.Predict(Y)
So(err, ShouldEqual, nil)
cf, err := evaluation.GetConfusionMatrix(Y, predictions)
So(err, ShouldEqual, nil)
So(evaluation.GetAccuracy(cf), ShouldBeGreaterThan, 0.70)
})
Convey("Saving should work...", func() {
f, err := ioutil.TempFile("", "tree")
So(err, ShouldBeNil)
err = m.Save(f.Name())
Convey("Fitting should work...", func() {
m := NewMultiLinearSVC("l1", "l2", true, 1.0, 1e-4, nil)
err := m.Fit(X)
So(err, ShouldBeNil)
Convey("Loading should work...", func() {
mLoaded := NewMultiLinearSVC("l1", "l2", true, 1.00, 1e-8, nil)
err := mLoaded.Load(f.Name())
Convey("Predictions should work...", func() {
predictions, err := m.Predict(Y)
So(err, ShouldEqual, nil)
cf, err := evaluation.GetConfusionMatrix(Y, predictions)
So(err, ShouldEqual, nil)
So(evaluation.GetAccuracy(cf), ShouldBeGreaterThan, 0.70)
})
Convey("Saving should work...", func() {
f, err := ioutil.TempFile("", "tree")
So(err, ShouldBeNil)
err = m.Save(f.Name())
So(err, ShouldBeNil)
Convey("Predictions should be the same...", func() {
originalPredictions, err := m.Predict(Y)
Convey("Loading should work...", func() {
mLoaded := NewMultiLinearSVC("l1", "l2", true, 1.00, 1e-8, nil)
err := mLoaded.Load(f.Name())
So(err, ShouldBeNil)
newPredictions, err := mLoaded.Predict(Y)
So(err, ShouldBeNil)
So(base.InstancesAreEqual(originalPredictions, newPredictions), ShouldBeTrue)
Convey("Predictions should be the same...", func() {
originalPredictions, err := m.Predict(Y)
So(err, ShouldBeNil)
newPredictions, err := mLoaded.Predict(Y)
So(err, ShouldBeNil)
So(base.InstancesAreEqual(originalPredictions, newPredictions), ShouldBeTrue)
})
})
})
})
})
}

View File

@ -0,0 +1,93 @@
/*
* This file contains functions related to creating + freeing
* objects on behalf of the go runtime
*/
#include "linear.h"
#include <stdlib.h>
extern "C" {
/* NOTE: the Golang versions of the structures must call the corresponding
* Free functions via runtime.SetFinalize */
/* CreateCProblem allocates a new struct problem outside of Golang's
* garbage collection. */
struct problem *CreateCProblem() {
auto ret = new problem();
*ret = {}; // < Clear all fields
return ret;
}
/* CreateCModel allocates a new struct model outside of Golang's
* garbage collection. */
struct model *CreateCModel() {
auto ret = new model();
*ret = {}; // < Clear all fields
return ret;
}
/* CreateCParameter allocates a new struct parameter outside of
* Golang's garbage collection.*/
struct parameter *CreateCParameter() {
return reinterpret_cast<struct parameter*>(calloc(1, sizeof(struct parameter)));
}
/* Free's a previously allocated problem and all its data */
void FreeCProblem(struct problem *p) {
if (p->y != nullptr) {
free(p->y);
p->y = nullptr;
}
if (p->x != nullptr) {
free(p->x);
p->x = nullptr;
}
delete p;
}
/* free's a model with libsvm's internal routines */
void FreeCModel(struct model *m) {
free_model_content(m);
delete m;
}
/* free's a parameter via libsvm */
void FreeCParameter(struct parameter *p) {
if (p == nullptr) {
return;
}
free(p);
}
/* Allocates a vector of doubles for storing target values
* outside of Go's garbage collection */
int AllocateLabelsForProblem (struct problem *p, int numValues) {
p->y = reinterpret_cast<double *>(malloc(sizeof(double) * numValues));
return p->y == nullptr;
}
/* Utility method used to set the target value for a particular
* input row */
void AssignLabelForProblem(struct problem *p, int i, double d) {
p->y[i] = d;
}
/* Allocates a buffer of input rows and inserts the per-row values */
int RiffleFeatures(struct problem *p, int num_offsets, int* row_offsets, struct feature_node *features) {
// Allocate space for the feature node buffer.
p->x = reinterpret_cast<struct feature_node**>(
calloc(num_offsets, sizeof(struct feature_node *))
);
if (p->x == nullptr) {
return -1;
}
for (int i = 0; i < num_offsets; i++) {
int offset = row_offsets[i];
p->x[i] = features + offset;
}
return 0;
}
} /* extern "C" */

View File

@ -0,0 +1,19 @@
#ifndef _H_INTEGRATION_
#define _H_INTEGRATION_
#include "linear.h"
struct problem *CreateCProblem();
void FreeCProblem(struct problem*);
struct model *CreateCModel();
void FreeCModel(struct model*);
struct parameter *CreateCParameter();
void FreeCParameter(struct parameter*);
// Allocates memory outside of golang for describing feature
// vectors.
int RiffleFeatures(struct problem *p, int num_offsets, int* row_offsets, struct feature_node *features);
int AllocateLabelsForProblem(struct problem *, int);
void AssignLabelForProblem(struct problem *, int, double);
struct feature_node *GetFeatureNodeForIndex(struct problem *, int, int);
#endif

View File

@ -1,22 +1,47 @@
package linear_models
/*
#include "linear.h"
#include "integration.h"
#cgo CFLAGS: -O3
#cgo CXXFLAGS: -std=c++11 -O3
*/
import "C"
import "fmt"
import "unsafe"
import (
"fmt"
"runtime"
)
// Problem wraps a libsvm problem struct which describes a classification/
// regression problem. No externally-accessible fields.
type Problem struct {
c_prob C.struct_problem
c_prob *C.struct_problem
featureNodes []C.struct_feature_node
}
// Free releases resources associated with a libsvm problem.
func (p *Problem) Free() {
C.FreeCProblem(p.c_prob)
}
// Parameter encasulates all the possible libsvm training options.
// TODO: make user control of these more extensive.
type Parameter struct {
c_param C.struct_parameter
c_param *C.struct_parameter
}
// Free releases resources associated with a Parameter.
func (p *Parameter) Free() {
C.FreeCParameter(p.c_param)
}
// Model encapsulates a trained libsvm model.
type Model struct {
c_model unsafe.Pointer
c_model *C.struct_model
}
// Free releases resources associated with a trained libsvm model.
func (m *Model) Free() {
C.FreeCModel(m.c_model)
}
const (
@ -30,8 +55,14 @@ const (
L2R_LR_DUAL = C.L2R_LR_DUAL
)
// NewParameter creates a libsvm parameter structure, which controls
// various aspects of libsvm training.
// For more information on what these parameters do, consult the
// "`train` usage" section of
// https://github.com/cjlin1/liblinear/blob/master/README
func NewParameter(solver_type int, C float64, eps float64) *Parameter {
param := Parameter{}
param := &Parameter{C.CreateCParameter()}
runtime.SetFinalizer(param, (*Parameter).Free)
param.c_param.solver_type = C.int(solver_type)
param.c_param.eps = C.double(eps)
param.c_param.C = C.double(C)
@ -39,30 +70,37 @@ func NewParameter(solver_type int, C float64, eps float64) *Parameter {
param.c_param.weight_label = nil
param.c_param.weight = nil
return &param
return param
}
// NewProblem creates input to libsvm which describes a particular
// regression/classification problem. It requires an array of float values
// and an array of y values.
func NewProblem(X [][]float64, y []float64, bias float64) *Problem {
prob := Problem{}
prob := &Problem{C.CreateCProblem(), nil}
runtime.SetFinalizer(prob, (*Problem).Free)
prob.c_prob.l = C.int(len(X))
prob.c_prob.n = C.int(len(X[0]) + 1)
prob.c_prob.x = convert_features(X, bias)
c_y := make([]C.double, len(y))
convert_features(prob, X, bias)
C.AllocateLabelsForProblem(prob.c_prob, C.int(len(y)))
for i := 0; i < len(y); i++ {
c_y[i] = C.double(y[i])
C.AssignLabelForProblem(prob.c_prob, C.int(i), C.double(y[i]))
}
prob.c_prob.y = &c_y[0]
// Should not go out of scope until the Problem struct
// is cleaned up.
prob.c_prob.bias = C.double(-1)
return &prob
return prob
}
// Train invokes libsvm and returns a trained model.
func Train(prob *Problem, param *Parameter) *Model {
libLinearHookPrintFunc() // Sets up logging
tmpCProb := &prob.c_prob
tmpCParam := &param.c_param
return &Model{unsafe.Pointer(C.train(tmpCProb, tmpCParam))}
out := C.train(prob.c_prob, param.c_param)
m := &Model{out}
runtime.SetFinalizer(m, (*Model).Free)
return m
}
func Export(model *Model, filePath string) error {
@ -74,19 +112,25 @@ func Export(model *Model, filePath string) error {
}
func Load(model *Model, filePath string) error {
model.c_model = unsafe.Pointer(C.load_model(C.CString(filePath)))
model.c_model = C.load_model(C.CString(filePath))
if model.c_model == nil {
return fmt.Errorf("Something went wrong")
}
return nil
}
// Predict takes a row of float values corresponding to a particular
// input and returns the regression result.
func Predict(model *Model, x []float64) float64 {
c_x := convert_vector(x, 0)
c_y := C.predict((*C.struct_model)(model.c_model), c_x)
y := float64(c_y)
return y
}
// convert_vector is an internal function used for converting
// dense float64 vectors into the sparse input that libsvm accepts.
func convert_vector(x []float64, bias float64) *C.struct_feature_node {
n_ele := 0
for i := 0; i < len(x); i++ {
@ -113,43 +157,48 @@ func convert_vector(x []float64, bias float64) *C.struct_feature_node {
c_x[j].index = C.int(-1)
return &c_x[0]
}
func convert_features(X [][]float64, bias float64) **C.struct_feature_node {
n_samples := len(X)
n_elements := 0
for i := 0; i < n_samples; i++ {
// convert_features is an internal function used for converting
// dense 2D arrays of float values into the sparse format libsvm accepts.
func convert_features(prob *Problem, X [][]float64, bias float64) {
rowCount := len(X)
// This structure remembers the start and end elements for each row.
// We push them back into a global list of C.struct_feature_nodes, then
// riffle it in C using their indices to form the **C.struct_feature_nodes
// input. Go retains ownership of struct_feature_nodes, C has ownership of
// the enclosing **C.struct_feature_nodes feature.
rowOffsets := make([]C.int, 0)
featureNodes := make([]C.struct_feature_node, 0)
// First pass, just counting through each row and counting the number of elements we find.
for i := 0; i < rowCount; i++ {
rowOffsets = append(rowOffsets, C.int(len(featureNodes))) // Push back the starting element of this row
if bias != 0.0 { // Allocate space for a bias node
featureNodes = append(featureNodes, C.struct_feature_node{
C.int(0), C.double(bias),
})
}
for j := 0; j < len(X[i]); j++ {
if X[i][j] != 0.0 {
n_elements++
}
n_elements++ //for bias
}
}
x_space := make([]C.struct_feature_node, n_elements+n_samples)
cursor := 0
x := make([]*C.struct_feature_node, n_samples)
var c_x **C.struct_feature_node
for i := 0; i < n_samples; i++ {
x[i] = &x_space[cursor]
for j := 0; j < len(X[i]); j++ {
if X[i][j] != 0.0 {
x_space[cursor].index = C.int(j + 1)
x_space[cursor].value = C.double(X[i][j])
cursor++
}
if bias > 0 {
x_space[cursor].index = C.int(0)
x_space[cursor].value = C.double(bias)
cursor++
// For every non-zero thing in the data grid, allocate a feature node.
featureNodes = append(featureNodes, C.struct_feature_node{
C.int(j + 1), C.double(X[i][j]),
})
}
}
x_space[cursor].index = C.int(-1)
cursor++
// Finally, add a terminating element which tells libsvm that there's nothing
// left on this row
featureNodes = append(featureNodes, C.struct_feature_node{
C.int(-1), C.double(0),
})
}
// Transform [feature_node, feature_node, feature_node, ...] list into
// [*feature_node(1), *feature_node(m), ...] through the C integration bridge.
// C owns that particular memory.
// int RiffleFeatures(struct problem *p, int num_offsets, int* row_offsets, struct feature_node *features) {
if C.RiffleFeatures(prob.c_prob, C.int(len(featureNodes)), &rowOffsets[0], &featureNodes[0]) != 0 {
panic("RiffledFeatures could not allocate memory")
}
c_x = &x[0]
return c_x
}

View File

@ -5,10 +5,10 @@ import "C"
import (
"encoding/json"
"fmt"
"github.com/sjwhitworth/golearn/base"
"io/ioutil"
"os"
"unsafe"
"github.com/sjwhitworth/golearn/base"
)
// LinearSVCParams represnts all available LinearSVC options.
@ -153,6 +153,7 @@ func (lr *LinearSVC) Fit(X base.FixedDataGrid) error {
var weightClasses []C.int
// Creates the class weighting
fmt.Println("Generating class weights...")
if lr.Param.ClassWeights == nil {
if lr.Param.WeightClassesAutomatically {
weightVec = generateClassWeightVectorFromDist(X)
@ -169,17 +170,20 @@ func (lr *LinearSVC) Fit(X base.FixedDataGrid) error {
}
// Convert the problem
fmt.Println("Converting instances...")
problemVec := convertInstancesToProblemVec(X)
labelVec := convertInstancesToLabelVec(X)
// Train
fmt.Println("Training...")
prob := NewProblem(problemVec, labelVec, 0)
lr.param.c_param.nr_weight = C.int(len(weightVec))
lr.param.c_param.weight_label = &(weightClasses[0])
lr.param.c_param.weight = (*C.double)(unsafe.Pointer(&weightVec[0]))
lr.param.c_param.weight = (*C.double)(&weightVec[0])
// lr.param.weights = (*C.double)unsafe.Pointer(&(weightVec[0]));
lr.model = Train(prob, lr.param)
fmt.Println("Training completed")
return nil
}

Binary file not shown.

View File

@ -2,6 +2,7 @@ package meta
import (
"fmt"
"github.com/sjwhitworth/golearn/base"
)
@ -46,7 +47,6 @@ func (m *OneVsAllModel) Fit(using base.FixedDataGrid) {
}
}
attrs := m.generateAttributes(using)
// Find the highest stored value
val := uint64(0)
classVals := classAttr.GetValues()
@ -60,6 +60,7 @@ func (m *OneVsAllModel) Fit(using base.FixedDataGrid) {
panic("Must have more than one class!")
}
m.maxClassVal = val
fmt.Println("Found maximum rows")
// If we're reloading, we may just be fitting to the structure
_, srcRows := using.Size()
@ -152,9 +153,6 @@ func (m *OneVsAllModel) LoadWithPrefix(reader *base.ClassifierDeserializer, pref
return base.DescribeError("Can't load INSTANCE_STRUCTURE", err)
}
m.Fit(fitOn)
/*if err != nil {
base.DescribeError("Could not fit reloaded classifier to the structure", err)
}*/
// Reload the filters
numFiltersU64, err := reader.GetU64ForKey(reader.Prefix(prefix, "FILTER_COUNT"))
@ -229,7 +227,7 @@ func (m *OneVsAllModel) LoadWithPrefix(reader *base.ClassifierDeserializer, pref
for i, c := range classVals {
cls := m.NewClassifierFunction(c)
clsPrefix := pI("CLASSIFIERS", i)
fmt.Println("Loading classifier...")
err = cls.LoadWithPrefix(reader, clsPrefix)
if err != nil {
return base.FormatError(err, "Could not reload classifier at: %s", clsPrefix)
@ -264,7 +262,7 @@ func (m *OneVsAllModel) SaveWithPrefix(writer *base.ClassifierSerializer, prefix
}
// Save the instances
err := writer.WriteInstancesForKey(writer.Prefix(prefix, "INSTANCE_STRUCTURE"), m.fitOn, false)
err := writer.WriteInstancesForKey(writer.Prefix(prefix, "INSTANCE_STRUCTURE"), base.NewStructuralCopy(m.fitOn), false)
if err != nil {
return base.DescribeError("Unable to write INSTANCE_STRUCTURE", err)
}