From 1e888d2a9785571fed74b75f3d0e8a2340c5a263 Mon Sep 17 00:00:00 2001 From: Richard Townsend Date: Thu, 30 Oct 2014 22:02:38 +0000 Subject: [PATCH 1/2] base: More general version of equality --- base/dense.go | 29 ----------------------------- base/lazy_sort_test.go | 4 ++-- base/sort_test.go | 4 ++-- base/util_instances.go | 31 +++++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 33 deletions(-) diff --git a/base/dense.go b/base/dense.go index e99e308..a4cedd7 100644 --- a/base/dense.go +++ b/base/dense.go @@ -443,35 +443,6 @@ func (inst *DenseInstances) swapRows(i, j int) { } } -// Equal checks whether a given Instance set is exactly the same -// as another: same size and same values (as determined by the Attributes) -// -// IMPORTANT: does not explicitly check if the Attributes are considered equal. -func (inst *DenseInstances) Equal(other DataGrid) bool { - - _, rows := inst.Size() - - for _, a := range inst.AllAttributes() { - as1, err := inst.GetAttribute(a) - if err != nil { - panic(err) // That indicates some kind of error - } - as2, err := inst.GetAttribute(a) - if err != nil { - return false // Obviously has different Attributes - } - for i := 0; i < rows; i++ { - b1 := inst.Get(as1, i) - b2 := inst.Get(as2, i) - if !byteSeqEqual(b1, b2) { - return false - } - } - } - - return true -} - // String returns a human-readable summary of this dataset. func (inst *DenseInstances) String() string { var buffer bytes.Buffer diff --git a/base/lazy_sort_test.go b/base/lazy_sort_test.go index 2c88aa8..a43b5e3 100644 --- a/base/lazy_sort_test.go +++ b/base/lazy_sort_test.go @@ -29,7 +29,7 @@ func TestLazySortDesc(t *testing.T) { }) Convey("Result should match the reference", func() { - So(sortedDescending.Equal(result), ShouldBeTrue) + So(InstancesAreEqual(sortedDescending, result), ShouldBeTrue) }) }) }) @@ -60,7 +60,7 @@ func TestLazySortAsc(t *testing.T) { }) Convey("Result should match the reference", func() { - So(sortedAscending.Equal(result), ShouldBeTrue) + So(InstancesAreEqual(sortedAscending, result), ShouldBeTrue) }) Convey("First element of Result should equal known value", func() { diff --git a/base/sort_test.go b/base/sort_test.go index 55b92b6..700339e 100644 --- a/base/sort_test.go +++ b/base/sort_test.go @@ -59,7 +59,7 @@ func TestSortDesc(t *testing.T) { }) Convey("Result should match the reference", func() { - So(sortedDescending.Equal(result), ShouldBeTrue) + So(InstancesAreEqual(sortedDescending, result), ShouldBeTrue) }) }) }) @@ -90,7 +90,7 @@ func TestSortAsc(t *testing.T) { }) Convey("Result should match the reference", func() { - So(sortedAscending.Equal(result), ShouldBeTrue) + So(InstancesAreEqual(sortedAscending, result), ShouldBeTrue) }) Convey("First element of Result should equal known value", func() { diff --git a/base/util_instances.go b/base/util_instances.go index b11e6a4..37b2388 100644 --- a/base/util_instances.go +++ b/base/util_instances.go @@ -407,3 +407,34 @@ func CheckStrictlyCompatible(s1 FixedDataGrid, s2 FixedDataGrid) bool { return true } + +// InstancesAreEqual checks whether a given Instance set is exactly +// the same as another (i.e. has the same size and values). +func InstancesAreEqual(inst, other FixedDataGrid) bool { + _, rows := inst.Size() + + for _, a := range inst.AllAttributes() { + as1, err := inst.GetAttribute(a) + if err != nil { + panic(err) // That indicates some kind of error + } + as2, err := inst.GetAttribute(a) + if err != nil { + return false // Obviously has different Attributes + } + + if !as1.GetAttribute().Equals(as2.GetAttribute()) { + return false + } + + for i := 0; i < rows; i++ { + b1 := inst.Get(as1, i) + b2 := inst.Get(as2, i) + if !byteSeqEqual(b1, b2) { + return false + } + } + } + + return true +} From 6929052af033a1ada88e04dd7667f42ff97d628b Mon Sep 17 00:00:00 2001 From: Richard Townsend Date: Thu, 30 Oct 2014 22:10:39 +0000 Subject: [PATCH 2/2] base: conversion to DenseInstances via DenseCopyOf --- base/dense.go | 34 +++++ base/view_test.go | 9 ++ examples/datasets/iris_headers_subset.csv | 151 ++++++++++++++++++++++ 3 files changed, 194 insertions(+) create mode 100644 examples/datasets/iris_headers_subset.csv diff --git a/base/dense.go b/base/dense.go index a4cedd7..97bdeb8 100644 --- a/base/dense.go +++ b/base/dense.go @@ -44,6 +44,40 @@ func NewDenseInstances() *DenseInstances { } } +// NewDenseCopy generates a new DenseInstances set +// from an existing FixedDataGrid. +func NewDenseCopy(of FixedDataGrid) *DenseInstances { + + ret := NewDenseInstances() // Create the skeleton + // Attribute creation + attrs := of.AllAttributes() + specs1 := make([]AttributeSpec, len(attrs)) + specs2 := make([]AttributeSpec, len(attrs)) + for i, a := range attrs { + // Retrieve old AttributeSpec + s, err := of.GetAttribute(a) + if err != nil { + panic(err) + } + specs1[i] = s + // Add and store new AttributeSpec + specs2[i] = ret.AddAttribute(a) + } + // Allocate memory + _, rows := of.Size() + ret.Extend(rows) + + // Copy each row from the old one to the new + of.MapOverRows(specs1, func(v [][]byte, r int) (bool, error) { + for i, c := range v { + ret.Set(specs2[i], r, c) + } + return true, nil + }) + + return ret +} + // // AttributeGroup functions // diff --git a/base/view_test.go b/base/view_test.go index 3d9860b..8f6e5a5 100644 --- a/base/view_test.go +++ b/base/view_test.go @@ -114,6 +114,15 @@ func TestInstancesViewAttrs(t *testing.T) { So(ok3, ShouldEqual, true) So(ok4, ShouldEqual, true) }) + Convey("The InstancesView should match one prepared earlier...", func() { + instRef, err := ParseCSVToInstances("../examples/datasets/iris_headers_subset.csv", true) + So(err, ShouldBeNil) + So(InstancesAreEqual(instRef, instView), ShouldBeTrue) + Convey("And a DenseInstances conversion should too...", func() { + instView2 := NewDenseCopy(instRef) + So(InstancesAreEqual(instRef, instView2), ShouldBeTrue) + }) + }) }) }) } diff --git a/examples/datasets/iris_headers_subset.csv b/examples/datasets/iris_headers_subset.csv new file mode 100644 index 0000000..84135c5 --- /dev/null +++ b/examples/datasets/iris_headers_subset.csv @@ -0,0 +1,151 @@ + Sepal width,Petal length, Petal width, Species +3.5,1.4,0.2,Iris-setosa +3,1.4,0.2,Iris-setosa +3.2,1.3,0.2,Iris-setosa +3.1,1.5,0.2,Iris-setosa +3.6,1.4,0.2,Iris-setosa +3.9,1.7,0.4,Iris-setosa +3.4,1.4,0.3,Iris-setosa +3.4,1.5,0.2,Iris-setosa +2.9,1.4,0.2,Iris-setosa +3.1,1.5,0.1,Iris-setosa +3.7,1.5,0.2,Iris-setosa +3.4,1.6,0.2,Iris-setosa +3,1.4,0.1,Iris-setosa +3,1.1,0.1,Iris-setosa +4,1.2,0.2,Iris-setosa +4.4,1.5,0.4,Iris-setosa +3.9,1.3,0.4,Iris-setosa +3.5,1.4,0.3,Iris-setosa +3.8,1.7,0.3,Iris-setosa +3.8,1.5,0.3,Iris-setosa +3.4,1.7,0.2,Iris-setosa +3.7,1.5,0.4,Iris-setosa +3.6,1,0.2,Iris-setosa +3.3,1.7,0.5,Iris-setosa +3.4,1.9,0.2,Iris-setosa +3,1.6,0.2,Iris-setosa +3.4,1.6,0.4,Iris-setosa +3.5,1.5,0.2,Iris-setosa +3.4,1.4,0.2,Iris-setosa +3.2,1.6,0.2,Iris-setosa +3.1,1.6,0.2,Iris-setosa +3.4,1.5,0.4,Iris-setosa +4.1,1.5,0.1,Iris-setosa +4.2,1.4,0.2,Iris-setosa +3.1,1.5,0.1,Iris-setosa +3.2,1.2,0.2,Iris-setosa +3.5,1.3,0.2,Iris-setosa +3.1,1.5,0.1,Iris-setosa +3,1.3,0.2,Iris-setosa +3.4,1.5,0.2,Iris-setosa +3.5,1.3,0.3,Iris-setosa +2.3,1.3,0.3,Iris-setosa +3.2,1.3,0.2,Iris-setosa +3.5,1.6,0.6,Iris-setosa +3.8,1.9,0.4,Iris-setosa +3,1.4,0.3,Iris-setosa +3.8,1.6,0.2,Iris-setosa +3.2,1.4,0.2,Iris-setosa +3.7,1.5,0.2,Iris-setosa +3.3,1.4,0.2,Iris-setosa +3.2,4.7,1.4,Iris-versicolor +3.2,4.5,1.5,Iris-versicolor +3.1,4.9,1.5,Iris-versicolor +2.3,4,1.3,Iris-versicolor +2.8,4.6,1.5,Iris-versicolor +2.8,4.5,1.3,Iris-versicolor +3.3,4.7,1.6,Iris-versicolor +2.4,3.3,1,Iris-versicolor +2.9,4.6,1.3,Iris-versicolor +2.7,3.9,1.4,Iris-versicolor +2,3.5,1,Iris-versicolor +3,4.2,1.5,Iris-versicolor +2.2,4,1,Iris-versicolor +2.9,4.7,1.4,Iris-versicolor +2.9,3.6,1.3,Iris-versicolor +3.1,4.4,1.4,Iris-versicolor +3,4.5,1.5,Iris-versicolor +2.7,4.1,1,Iris-versicolor +2.2,4.5,1.5,Iris-versicolor +2.5,3.9,1.1,Iris-versicolor +3.2,4.8,1.8,Iris-versicolor +2.8,4,1.3,Iris-versicolor +2.5,4.9,1.5,Iris-versicolor +2.8,4.7,1.2,Iris-versicolor +2.9,4.3,1.3,Iris-versicolor +3,4.4,1.4,Iris-versicolor +2.8,4.8,1.4,Iris-versicolor +3,5,1.7,Iris-versicolor +2.9,4.5,1.5,Iris-versicolor +2.6,3.5,1,Iris-versicolor +2.4,3.8,1.1,Iris-versicolor +2.4,3.7,1,Iris-versicolor +2.7,3.9,1.2,Iris-versicolor +2.7,5.1,1.6,Iris-versicolor +3,4.5,1.5,Iris-versicolor +3.4,4.5,1.6,Iris-versicolor +3.1,4.7,1.5,Iris-versicolor +2.3,4.4,1.3,Iris-versicolor +3,4.1,1.3,Iris-versicolor +2.5,4,1.3,Iris-versicolor +2.6,4.4,1.2,Iris-versicolor +3,4.6,1.4,Iris-versicolor +2.6,4,1.2,Iris-versicolor +2.3,3.3,1,Iris-versicolor +2.7,4.2,1.3,Iris-versicolor +3,4.2,1.2,Iris-versicolor +2.9,4.2,1.3,Iris-versicolor +2.9,4.3,1.3,Iris-versicolor +2.5,3,1.1,Iris-versicolor +2.8,4.1,1.3,Iris-versicolor +3.3,6,2.5,Iris-virginica +2.7,5.1,1.9,Iris-virginica +3,5.9,2.1,Iris-virginica +2.9,5.6,1.8,Iris-virginica +3,5.8,2.2,Iris-virginica +3,6.6,2.1,Iris-virginica +2.5,4.5,1.7,Iris-virginica +2.9,6.3,1.8,Iris-virginica +2.5,5.8,1.8,Iris-virginica +3.6,6.1,2.5,Iris-virginica +3.2,5.1,2,Iris-virginica +2.7,5.3,1.9,Iris-virginica +3,5.5,2.1,Iris-virginica +2.5,5,2,Iris-virginica +2.8,5.1,2.4,Iris-virginica +3.2,5.3,2.3,Iris-virginica +3,5.5,1.8,Iris-virginica +3.8,6.7,2.2,Iris-virginica +2.6,6.9,2.3,Iris-virginica +2.2,5,1.5,Iris-virginica +3.2,5.7,2.3,Iris-virginica +2.8,4.9,2,Iris-virginica +2.8,6.7,2,Iris-virginica +2.7,4.9,1.8,Iris-virginica +3.3,5.7,2.1,Iris-virginica +3.2,6,1.8,Iris-virginica +2.8,4.8,1.8,Iris-virginica +3,4.9,1.8,Iris-virginica +2.8,5.6,2.1,Iris-virginica +3,5.8,1.6,Iris-virginica +2.8,6.1,1.9,Iris-virginica +3.8,6.4,2,Iris-virginica +2.8,5.6,2.2,Iris-virginica +2.8,5.1,1.5,Iris-virginica +2.6,5.6,1.4,Iris-virginica +3,6.1,2.3,Iris-virginica +3.4,5.6,2.4,Iris-virginica +3.1,5.5,1.8,Iris-virginica +3,4.8,1.8,Iris-virginica +3.1,5.4,2.1,Iris-virginica +3.1,5.6,2.4,Iris-virginica +3.1,5.1,2.3,Iris-virginica +2.7,5.1,1.9,Iris-virginica +3.2,5.9,2.3,Iris-virginica +3.3,5.7,2.5,Iris-virginica +3,5.2,2.3,Iris-virginica +2.5,5,1.9,Iris-virginica +3,5.2,2,Iris-virginica +3.4,5.4,2.3,Iris-virginica +3,5.1,1.8,Iris-virginica