1
0
mirror of https://github.com/sjwhitworth/golearn.git synced 2025-04-28 13:48:56 +08:00
golearn/base/dataframe_go.go

67 lines
1.5 KiB
Go
Raw Normal View History

2020-11-04 17:07:37 +05:30
package base
import (
"fmt"
"reflect"
"strconv"
"github.com/rocketlaunchr/dataframe-go"
2020-11-04 17:07:37 +05:30
)
// ConvertDataFrameToInstances converts a DataFrame-go dataframe object to Golearn Fixed Data Grid. Allows for compabitibility between dataframe and golearn's ML models.
// df is the dataframe Object. classAttrIndex is the index of the class Attribute in the data.i
func ConvertDataFrameToInstances(df *dataframe.DataFrame, classAttrIndex int) FixedDataGrid {
2020-11-04 17:07:37 +05:30
// Creating Attributes based on Dataframe
2020-11-04 17:07:37 +05:30
names := df.Names()
attrs := make([]Attribute, len(names))
2020-11-04 17:07:37 +05:30
newInst := NewDenseInstances()
2020-11-04 17:07:37 +05:30
for i := range names {
col := df.Series[i]
if reflect.TypeOf(col.Value(0)).Kind() == reflect.String {
attrs[i] = new(CategoricalAttribute)
2020-11-04 17:07:37 +05:30
attrs[i].SetName(names[i])
} else {
attrs[i] = NewFloatAttribute(names[i])
2020-11-04 17:07:37 +05:30
}
}
// Add the attributes
newSpecs := make([]AttributeSpec, len(attrs))
2020-11-04 17:07:37 +05:30
for i, a := range attrs {
newSpecs[i] = newInst.AddAttribute(a)
}
// Adding the class attribute
newInst.AddClassAttribute(attrs[classAttrIndex])
// Allocate space
nRows := df.NRows()
newInst.Extend(df.NRows())
// Write the data based on DataType
for i := 0; i < nRows; i++ {
for j := range names {
col := df.Series[j]
var val string
switch v := col.Value(i).(type) {
case string:
val = v
case int64:
val = strconv.FormatInt(v, 10)
case float64:
val = fmt.Sprintf("%f", v)
case float32:
val = fmt.Sprintf("%f", v)
}
newInst.Set(newSpecs[j], i, newSpecs[j].GetAttribute().GetSysValFromString(val))
}
}
return newInst
}