linear_models: fixed an issue with cgo pointers

2025-04-26 13:49:14 +08:00 · 2017-04-10 00:30:41 +01:00 · 2017-04-10 00:30:41 +01:00 · 8ba2c56945
commit 8ba2c56945
parent 3e43e74895
3 changed files with 205 additions and 36 deletions
--- a/linear_models/integration.cpp
+++ b/linear_models/integration.cpp
@ -0,0 +1,114 @@
+/*
+ * This file contains functions related to creating + freeing
+ * objects on behalf of the go runtime
+ */
+
+#include "linear.h"
+#include <stdlib.h>
+
+extern "C" {
+
+/* NOTE: the Golang versions of the structures must call the corresponding
+ * Free functions via runtime.SetFinalize */
+/* CreateCProblem allocates a new struct problem outside of Golang's
+ * garbage collection. */
+struct problem *CreateCProblem() {
+    auto ret = new problem();
+    *ret = {}; // < Clear all fields
+    return ret;
+}
+
+/* CreateCModel allocates a new struct model outside of Golang's 
+ * garbage collection. */
+struct model *CreateCModel() {
+    auto ret = new model();
+    *ret = {}; // < Clear all fields
+    return ret;
+}
+
+/* CreateCParameter allocates a new struct parameter outside of
+ * Golang's garbage collection.*/
+struct parameter *CreateCParameter() {
+    auto ret = new parameter();
+    *ret = {};
+    return ret;
+}
+
+/* Free's a previously allocated problem and all its data */
+void FreeCProblem(struct problem *p) {
+    if (p->y != nullptr) {
+        free(p->y);
+        p->y = nullptr;
+    }
+    if (p->x != nullptr) {
+        // l is the total count of rows in the problem
+        // n is the number of values in each row
+        for (int i = 0; i < p->l; i++) {
+            if (p->x[i] != nullptr) {
+                free(p->x[i]);
+                p->x[i] = nullptr;
+            }
+        }
+        free(p->x);
+        p->x = nullptr;
+    }
+    delete p;
+}
+
+/* free's a model with libsvm's internal routines */
+void FreeCModel(struct model *m) {
+    free_model_content(m);
+    delete m;
+}
+
+/* free's a parameter via libsvm */
+void FreeCParameter(struct parameter *p) {
+    destroy_param(p);
+    delete p;
+}
+
+/* Allocates a vector of doubles for storing target values
+ * outside of Go's garbage collection */
+int AllocateLabelsForProblem (struct problem *p, int numValues) {
+    p->y = reinterpret_cast<double *>(malloc(sizeof(double) * numValues));
+    return p->y == nullptr;
+}
+
+/* Utility method used to set the target value for a particular
+ * input row */
+void AssignLabelForProblem(struct problem *p, int i, double d) {
+    p->y[i] = d;
+}
+
+/* Returns a feature node for a particular row and column. */
+struct feature_node *GetFeatureNodeForIndex(struct problem *p, int i, int j) {
+    return &(p->x[i][j]);
+}
+
+/* Allocates a buffer of input rows and the values to fill them. */
+int AllocateFeatureNodesForProblem(struct problem *p, 
+        int numSamples, int numValues) {
+
+    numValues++; // Extend for terminating element
+    p->x = reinterpret_cast<struct feature_node **>(
+        calloc(numSamples, sizeof(struct feature_node *))
+    );
+    if (p->x == nullptr) {
+        return -1;
+    }
+
+    for (int i = 0; i < numSamples; i++) {
+        p->x[i] = reinterpret_cast<struct feature_node *>(
+            calloc(numValues, sizeof(struct feature_node))
+        );
+        if (p->x[i] == nullptr) {
+            return -1;
+        }
+        // Write the special terminating element, which signals
+        // to libsvm that there's no more data available on this row.
+        p->x[i][numValues-1].index = -1;
+    }
+    return 0;
+}
+
+} /* extern "C" */
--- a/linear_models/integration.h
+++ b/linear_models/integration.h
@ -0,0 +1,19 @@
+#ifndef _H_INTEGRATION_
+#define _H_INTEGRATION_
+
+#include "linear.h"
+
+struct problem *CreateCProblem();
+void FreeCProblem(struct problem*);
+struct model *CreateCModel();
+void FreeCModel(struct model*);
+struct parameter *CreateCParameter();
+void FreeCParameter(struct parameter*);
+// Allocates memory outside of golang for describing feature
+// vectors.
+int AllocateFeatureNodesForProblem(struct problem*, int, int);
+int AllocateLabelsForProblem(struct problem *, int);
+void AssignLabelForProblem(struct problem *, int, double);
+struct feature_node *GetFeatureNodeForIndex(struct problem *, int, int);
+
+#endif
--- a/linear_models/liblinear.go
+++ b/linear_models/liblinear.go
@ -1,24 +1,50 @@
 package linear_models

 /*
-#include "linear.h"
+#include "integration.h"
+#cgo CFLAGS:
+#cgo CXXFLAGS: -std=c++11 -g -O0
+#cgo LDFLAGS: -lc++ -g
 */
 import "C"
-import "fmt"
-import "unsafe"
+import (
+	"fmt"
+	"unsafe"
+	"runtime"
+)

+// Problem wraps a libsvm problem struct which describes a classification/
+// regression problem. No externally-accessible fields.
 type Problem struct {
-	c_prob C.struct_problem
+	c_prob *C.struct_problem
 }

+// Free releases resources associated with a libsvm problem.
+func (p *Problem) Free() {
+	C.FreeCProblem(p.c_prob)
+}
+
+// Parameter encasulates all the possible libsvm training options.
+// TODO: make user control of these more extensive.
 type Parameter struct {
-	c_param C.struct_parameter
+	c_param *C.struct_parameter
 }

+// Free releases resources associated with a Parameter.
+func (p *Parameter) Free() {
+	C.FreeCParameter(p.c_param)
+}
+
+// Model encapsulates a trained libsvm model.
 type Model struct {
 	c_model unsafe.Pointer
 }

+// Free releases resources associated with a trained libsvm model.
+func (m *Model) Free() {
+	C.FreeCModel(m.c_model)
+}
+
 const (
 	L2R_LR              = C.L2R_LR
 	L2R_L2LOSS_SVC_DUAL = C.L2R_L2LOSS_SVC_DUAL
@ -30,8 +56,14 @@ const (
 	L2R_LR_DUAL         = C.L2R_LR_DUAL
 )

+// NewParameter creates a libsvm parameter structure, which controls
+// various aspects of libsvm training.
+// For more information on what these parameters do, consult the
+// "`train` usage" section of
+// https://github.com/cjlin1/liblinear/blob/master/README
 func NewParameter(solver_type int, C float64, eps float64) *Parameter {
-	param := Parameter{}
+	param := &Parameter{C.CreateCParameter()}
+	runtime.SetFinalizer(param, (*Parameter).Free)
 	param.c_param.solver_type = C.int(solver_type)
 	param.c_param.eps = C.double(eps)
 	param.c_param.C = C.double(C)
@ -39,30 +71,37 @@ func NewParameter(solver_type int, C float64, eps float64) *Parameter {
 	param.c_param.weight_label = nil
 	param.c_param.weight = nil

-	return &param
+	return param
 }

+// NewProblem creates input to libsvm which describes a particular
+// regression/classification problem. It requires an array of float values
+// and an array of y values.
 func NewProblem(X [][]float64, y []float64, bias float64) *Problem {
-	prob := Problem{}
+	prob := &Problem{C.CreateCProblem()}
+	runtime.SetFinalizer(prob, (*Problem).Free)
 	prob.c_prob.l = C.int(len(X))
 	prob.c_prob.n = C.int(len(X[0]) + 1)

-	prob.c_prob.x = convert_features(X, bias)
-	c_y := make([]C.double, len(y))
+	convert_features(prob, X, bias)
+	C.AllocateLabelsForProblem(prob.c_prob, C.int(len(y)))
 	for i := 0; i < len(y); i++ {
-		c_y[i] = C.double(y[i])
+		C.AssignLabelForProblem(prob.c_prob, C.int(i), C.double(y[i]))
 	}
-	prob.c_prob.y = &c_y[0]
+	// Should not go out of scope until the Problem struct
+	// is cleaned up.
 	prob.c_prob.bias = C.double(-1)

-	return &prob
+	return prob
 }

+// Train invokes libsvm and returns a trained model.
 func Train(prob *Problem, param *Parameter) *Model {
 	libLinearHookPrintFunc() // Sets up logging
-	tmpCProb := &prob.c_prob
-	tmpCParam := &param.c_param
-	return &Model{unsafe.Pointer(C.train(tmpCProb, tmpCParam))}
+	out := C.train(prob.c_prob, param.c_param)
+	m := &Model{out}
+	runtime.SetFinalizer(m, (*Model).Free)
+	return m
 }

 func Export(model *Model, filePath string) error {
@ -79,14 +118,20 @@ func Load(model *Model, filePath string) error {
 		return fmt.Errorf("Something went wrong")
 	}
 	return nil
+	
 }

+// Predict takes a row of float values corresponding to a particular
+// input and returns the regression result.
 func Predict(model *Model, x []float64) float64 {
 	c_x := convert_vector(x, 0)
 	c_y := C.predict((*C.struct_model)(model.c_model), c_x)
 	y := float64(c_y)
 	return y
 }
+
+// convert_vector is an internal function used for converting
+// dense float64 vectors into the sparse input that libsvm accepts.
 func convert_vector(x []float64, bias float64) *C.struct_feature_node {
 	n_ele := 0
 	for i := 0; i < len(x); i++ {
@ -113,7 +158,10 @@ func convert_vector(x []float64, bias float64) *C.struct_feature_node {
 	c_x[j].index = C.int(-1)
 	return &c_x[0]
 }
-func convert_features(X [][]float64, bias float64) **C.struct_feature_node {
+
+// convert_features is an internal function used for converting
+// dense 2D arrays of float values into the sparse format libsvm accepts.
+func convert_features(prob *Problem, X [][]float64, bias float64) {
 	n_samples := len(X)
 	n_elements := 0

@ -122,34 +170,22 @@ func convert_features(X [][]float64, bias float64) **C.struct_feature_node {
 			if X[i][j] != 0.0 {
 				n_elements++
 			}
-			n_elements++ //for bias
+			n_elements++ // For bias
 		}
 	}
-
-	x_space := make([]C.struct_feature_node, n_elements+n_samples)
-
-	cursor := 0
-	x := make([]*C.struct_feature_node, n_samples)
-	var c_x **C.struct_feature_node
+	C.AllocateFeatureNodesForProblem(prob.c_prob, C.int(n_elements), C.int(n_samples))

 	for i := 0; i < n_samples; i++ {
-		x[i] = &x_space[cursor]
-
 		for j := 0; j < len(X[i]); j++ {
+			x_space := C.GetFeatureNodeForIndex(prob.c_prob, C.int(i), C.int(j))
 			if X[i][j] != 0.0 {
-				x_space[cursor].index = C.int(j + 1)
-				x_space[cursor].value = C.double(X[i][j])
-				cursor++
+				x_space.index = C.int(j + 1)
+				x_space.value = C.double(X[i][j])
 			}
 			if bias > 0 {
-				x_space[cursor].index = C.int(0)
-				x_space[cursor].value = C.double(bias)
-				cursor++
+				x_space.index = C.int(0)
+				x_space.value = C.double(bias)
 			}
 		}
-		x_space[cursor].index = C.int(-1)
-		cursor++
 	}
-	c_x = &x[0]
-	return c_x
 }