From 590d7a8091c0f2aed42cbdac508133a9ca6ccfd2 Mon Sep 17 00:00:00 2001 From: Richard Townsend Date: Sat, 14 May 2016 22:57:36 +0100 Subject: [PATCH 1/3] base: add a new instances type for mat64 --- base/mat.go | 117 +++++++++++++++++++++++++++++++++++++++++++++++ base/mat_test.go | 39 ++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 base/mat.go create mode 100644 base/mat_test.go diff --git a/base/mat.go b/base/mat.go new file mode 100644 index 0000000..8258b07 --- /dev/null +++ b/base/mat.go @@ -0,0 +1,117 @@ +package base + +import ( + "fmt" + "github.com/gonum/matrix/mat64" +) + +type Mat64Instances struct { + attributes []Attribute + classAttrs map[int]bool + Data *mat64.Dense + rows int +} + +// InstancesFromMat64 returns a new Mat64Instances from a literal provided. +func InstancesFromMat64(rows, cols int, data *mat64.Dense) *Mat64Instances { + + var ret Mat64Instances + for i := 0; i < cols; i++ { + ret.attributes = append(ret.attributes, NewFloatAttribute(fmt.Sprintf("%d", + i))) + } + + ret.Data = data + ret.rows = rows + return &ret +} + +// GetAttribute returns an AttributeSpec from an Attribute field. +func (m *Mat64Instances) GetAttribute(a Attribute) (AttributeSpec, error) { + for i, at := range m.attributes { + if at.Equals(a) { + return AttributeSpec{0, i, at}, nil + } + } + return AttributeSpec{}, fmt.Errorf("Couldn't find a matching attribute") +} + +// AllAttributes returns every defined Attribute. +func (m *Mat64Instances) AllAttributes() []Attribute { + ret := make([]Attribute, len(m.attributes)) + for i, a := range m.attributes { + ret[i] = a + } + return ret +} + +// AddClassAttribute adds an attribute to the class set. +func (m *Mat64Instances) AddClassAttribute(a Attribute) error { + as, err := m.GetAttribute(a) + if err != nil { + return err + } + + m.classAttrs[as.position] = true + return nil +} + +// RemoveClassAttribute removes an attribute to the class set. +func (m *Mat64Instances) RemoveClassAttribute(a Attribute) error { + as, err := m.GetAttribute(a) + if err != nil { + return err + } + + m.classAttrs[as.position] = false + return nil +} + +// AllClassAttributes returns every class attribute. +func (m *Mat64Instances) AllClassAttributes() []Attribute { + ret := make([]Attribute, 0) + for i := range m.classAttrs { + if m.classAttrs[i] { + ret = append(ret, m.attributes[i]) + } + } + + return ret +} + +// Get returns the bytes at a given position +func (m *Mat64Instances) Get(as AttributeSpec, row int) []byte { + val := m.Data.At(row, as.position) + return PackFloatToBytes(val) +} + +// MapOverRows is a convenience function for iteration +func (m *Mat64Instances) MapOverRows(as []AttributeSpec, f func([][]byte, int) (bool, error)) error { + + rowData := make([][]byte, len(as)) + for j, _ := range as { + rowData[j] = make([]byte, 8) + } + for i := 0; i < m.rows; i++ { + for j, as := range as { + PackFloatToBytesInline(m.Data.At(i, as.position), rowData[j]) + } + stat, err := f(rowData, i) + if !stat { + return err + } + } + return nil + +} + +// RowString: should print the values of a row +// TODO: make this less half-assed +func (m *Mat64Instances) RowString(row int) string { + return fmt.Sprintf("%d", row) +} + +// Size returns the number of Attributes, then the number of rows +func (m *Mat64Instances) Size() (int, int) { + return len(m.attributes), m.rows +} diff --git a/base/mat_test.go b/base/mat_test.go new file mode 100644 index 0000000..f4b8de2 --- /dev/null +++ b/base/mat_test.go @@ -0,0 +1,39 @@ +package base + +import ( + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func TestInlineMat64Creation(t *testing.T) { + + Convey("Given a literal array...", t, func() { + mat := mat64.NewDense(4, 3, []float64{ + 1, 0, 1, + 0, 1, 1, + 0, 0, 0, + 1, 1, 0, + }) + inst := InstancesFromMat64(4, 3, mat) + attrs := inst.AllAttributes() + Convey("Attributes should be well-defined...", func() { + So(len(attrs), ShouldEqual, 3) + }) + + Convey("No class variables set by default...", func() { + classAttrs := inst.AllClassAttributes() + So(len(classAttrs), ShouldEqual, 0) + }) + + Convey("Getting values should work...", func() { + as, err := inst.GetAttribute(attrs[0]) + So(err, ShouldBeNil) + valBytes := inst.Get(as, 3) + val := UnpackBytesToFloat(valBytes) + So(val, ShouldAlmostEqual, 1.0) + }) + + }) + +} From 6f7326b6ff5fe1682de5fee5c36b89da652939c6 Mon Sep 17 00:00:00 2001 From: Richard Townsend Date: Sat, 14 May 2016 23:07:04 +0100 Subject: [PATCH 2/3] neural: check that the new dense instances type works... --- base/mat.go | 1 + neural/layered_test.go | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/base/mat.go b/base/mat.go index 8258b07..c608558 100644 --- a/base/mat.go +++ b/base/mat.go @@ -21,6 +21,7 @@ func InstancesFromMat64(rows, cols int, data *mat64.Dense) *Mat64Instances { i))) } + ret.classAttrs = make(map[int]bool) ret.Data = data ret.rows = rows return &ret diff --git a/neural/layered_test.go b/neural/layered_test.go index 6081e17..0a4c496 100644 --- a/neural/layered_test.go +++ b/neural/layered_test.go @@ -155,3 +155,60 @@ func TestLayeredXOR(t *testing.T) { }) } + +func TestLayeredXORInline(t *testing.T) { + + Convey("Given an inline XOR dataset...", t, func() { + + data := mat64.NewDense(4, 3, []float64{ + 1, 0, 1, + 0, 1, 1, + 0, 0, 0, + 1, 1, 0, + }) + + XORData := base.InstancesFromMat64(4, 3, data) + classAttr := base.GetAttributeByName(XORData, "2") + XORData.AddClassAttribute(classAttr) + + net := NewMultiLayerNet([]int{3}) + net.MaxIterations = 20000 + net.Fit(XORData) + + Convey("After running for 20000 iterations, should have some predictive power...", func() { + + Convey("The right nodes should be connected in the network...", func() { + So(net.network.GetWeight(1, 1), ShouldAlmostEqual, 1.000) + So(net.network.GetWeight(2, 2), ShouldAlmostEqual, 1.000) + + for i := 1; i <= 6; i++ { + So(net.network.GetWeight(6, i), ShouldAlmostEqual, 0.000) + } + + }) + out := mat64.NewDense(6, 1, []float64{1.0, 0.0, 0.0, 0.0, 0.0, 0.0}) + net.network.Activate(out, 2) + So(out.At(5, 0), ShouldAlmostEqual, 1.0, 0.1) + + Convey("And Predict() should do OK too...", func() { + + pred := net.Predict(XORData) + + for _, a := range pred.AllAttributes() { + af, ok := a.(*base.FloatAttribute) + So(ok, ShouldBeTrue) + + af.Precision = 1 + } + + So(base.GetClass(pred, 0), ShouldEqual, "1.0") + So(base.GetClass(pred, 1), ShouldEqual, "1.0") + So(base.GetClass(pred, 2), ShouldEqual, "0.0") + So(base.GetClass(pred, 3), ShouldEqual, "0.0") + + }) + }) + + }) + +} From de9a6246fdd8eaab85f7878ef1917be8c29c1b5b Mon Sep 17 00:00:00 2001 From: Thatcher Peskens Date: Wed, 6 Jul 2016 18:00:30 -0700 Subject: [PATCH 3/3] added String() function to sentimentIron's inline-training-data function --- base/mat.go | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/base/mat.go b/base/mat.go index c608558..413e7fe 100644 --- a/base/mat.go +++ b/base/mat.go @@ -3,6 +3,7 @@ package base import ( "fmt" "github.com/gonum/matrix/mat64" + "bytes" ) type Mat64Instances struct { @@ -17,13 +18,15 @@ func InstancesFromMat64(rows, cols int, data *mat64.Dense) *Mat64Instances { var ret Mat64Instances for i := 0; i < cols; i++ { - ret.attributes = append(ret.attributes, NewFloatAttribute(fmt.Sprintf("%d", - i))) + ret.attributes = append(ret.attributes, NewFloatAttribute(fmt.Sprintf("%d", i))) } ret.classAttrs = make(map[int]bool) ret.Data = data ret.rows = rows + + ret.AddClassAttribute(ret.attributes[len(ret.attributes)-1]) + return &ret } @@ -103,7 +106,6 @@ func (m *Mat64Instances) MapOverRows(as []AttributeSpec, f func([][]byte, int) ( } } return nil - } // RowString: should print the values of a row @@ -116,3 +118,53 @@ func (m *Mat64Instances) RowString(row int) string { func (m *Mat64Instances) Size() (int, int) { return len(m.attributes), m.rows } + +// String returns a human-readable summary of this dataset. +func (m *Mat64Instances) String() string { + var buffer bytes.Buffer + + // Get all Attribute information + as := ResolveAllAttributes(m) + + // Print header + cols, rows := m.Size() + buffer.WriteString("Instances with ") + buffer.WriteString(fmt.Sprintf("%d row(s) ", rows)) + buffer.WriteString(fmt.Sprintf("%d attribute(s)\n", cols)) + buffer.WriteString(fmt.Sprintf("Attributes: \n")) + + cnt := 0 + for _, a := range as { + prefix := "\t" + if m.classAttrs[cnt] { + prefix = "*\t" + } + cnt++ + buffer.WriteString(fmt.Sprintf("%s%s\n", prefix, a.attr)) + } + + buffer.WriteString("\nData:\n") + maxRows := 30 + if rows < maxRows { + maxRows = rows + } + + for i := 0; i < maxRows; i++ { + buffer.WriteString("\t") + for _, a := range as { + val := m.Get(a, i) + buffer.WriteString(fmt.Sprintf("%s ", a.attr.GetStringFromSysVal(val))) + } + buffer.WriteString("\n") + } + + missingRows := rows - maxRows + if missingRows != 0 { + buffer.WriteString(fmt.Sprintf("\t...\n%d row(s) undisplayed", missingRows)) + } else { + buffer.WriteString("All rows displayed") + } + + return buffer.String() +} +