From 1c22641a3be83a0582cc02bbb947ef94cc0dcdef Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Sun, 4 May 2014 22:58:32 +0200 Subject: [PATCH 1/5] Fix manhattan distance calculation --- metrics/pairwise/manhattan.go | 27 ++++++++++----------------- metrics/pairwise/manhattan_test.go | 2 +- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/metrics/pairwise/manhattan.go b/metrics/pairwise/manhattan.go index 20b319a..39fc7b2 100644 --- a/metrics/pairwise/manhattan.go +++ b/metrics/pairwise/manhattan.go @@ -15,26 +15,19 @@ func NewManhattan() *Manhattan { // Manhattan distance, also known as L1 distance. // Compute sum of absolute values of elements. func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { - var length int - subVector := mat64.NewDense(0, 0, nil) - subVector.Sub(vectorX, vectorY) - - r, c := subVector.Dims() - - if r == 1 { - // Force transpose to column vector - subVector.TCopy(subVector) - length = c - } else if c == 1 { - length = r - } else { + r1, c1 := vectorX.Dims() + r2, c2 := vectorY.Dims() + if r1 != r2 || c1 != c2 { panic(mat64.ErrShape) } + // TODO: Add panic() whenever dimensions are not same result := .0 - for i := 0; i < length; i++ { - result += math.Abs(subVector.At(i, 0)) - } - return result + for i := 0; i < r1; i++ { + for j := 0; j < c1; j++ { + result += math.Abs(vectorX.At(i, j) - vectorY.At(i, j)) + } + } + return result } diff --git a/metrics/pairwise/manhattan_test.go b/metrics/pairwise/manhattan_test.go index e6361e4..2f54b1e 100644 --- a/metrics/pairwise/manhattan_test.go +++ b/metrics/pairwise/manhattan_test.go @@ -33,7 +33,7 @@ func TestManhattan(t *testing.T) { }) }) - Convey("When calculating distance with row and column vectors", func() { + Convey("When calculating distance with different dimention matrices", func() { vectorX.TCopy(vectorX) So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) }) From ad77c0dc7ed9ff420adc379bffa113f670f12ba8 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Sun, 4 May 2014 23:11:37 +0200 Subject: [PATCH 2/5] Add Chebyshev and Cranberra distances, improve Manhattan distance Previously, Manhattan distance measure was only able to calculate vector distances. Now, it's possible to calculate distances on matrices, too, without unnecessary overhead of copying vectors, doing an in-place lookups. --- metrics/pairwise/chebyshev.go | 34 ++++++++++++++++ metrics/pairwise/chebyshev_test.go | 42 ++++++++++++++++++++ metrics/pairwise/cranberra.go | 48 +++++++++++++++++++++++ metrics/pairwise/cranberra_test.go | 52 ++++++++++++++++++++++++ metrics/pairwise/manhattan.go | 1 - metrics/pairwise/manhattan_test.go | 63 +++++++++++++++++------------- 6 files changed, 212 insertions(+), 28 deletions(-) create mode 100644 metrics/pairwise/chebyshev.go create mode 100644 metrics/pairwise/chebyshev_test.go create mode 100644 metrics/pairwise/cranberra.go create mode 100644 metrics/pairwise/cranberra_test.go diff --git a/metrics/pairwise/chebyshev.go b/metrics/pairwise/chebyshev.go new file mode 100644 index 0000000..a89d992 --- /dev/null +++ b/metrics/pairwise/chebyshev.go @@ -0,0 +1,34 @@ +package pairwise + +import ( + "math" + + "github.com/gonum/matrix/mat64" +) + +type Chebyshev struct{} + +func NewChebyshev() *Chebyshev { + return &Chebyshev{} +} + +func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { + r1, c1 := vectorX.Dims() + r2, c2 := vectorY.Dims() + if r1 != r2 || c1 != c2 { + panic(mat64.ErrShape) + } + + subVector := mat64.NewDense(0, 0, nil) + subVector.Sub(vectorX, vectorY) + + max := float64(0) + + for i := 0; i < r1; i++ { + for j := 0; j < c1; j++ { + max = math.Max(max, math.Abs(vectorX.At(i, j) - vectorY.At(i, j))) + } + } + + return max +} diff --git a/metrics/pairwise/chebyshev_test.go b/metrics/pairwise/chebyshev_test.go new file mode 100644 index 0000000..2bdf3a4 --- /dev/null +++ b/metrics/pairwise/chebyshev_test.go @@ -0,0 +1,42 @@ +package pairwise + +import ( + "testing" + + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" +) + +func TestChebyshev(t *testing.T) { + var vectorX, vectorY *mat64.Dense + chebyshev := NewChebyshev() + + Convey("Given two vectors", t, func() { + vectorX = mat64.NewDense(4, 1, []float64{ 1, 2, 3, 4 }) + vectorY = mat64.NewDense(4, 1, []float64{ -5, -6, 7, 8 }) + + Convey("When calculating distance with two vectors", func() { + result := chebyshev.Distance(vectorX, vectorY) + + Convey("The result should be 8", func() { + So(result, ShouldEqual, 8) + }) + }) + + Convey("When calculating distance with row vectors", func() { + vectorX.TCopy(vectorX) + vectorY.TCopy(vectorY) + result := chebyshev.Distance(vectorX, vectorY) + + Convey("The result should be 8", func() { + So(result, ShouldEqual, 8) + }) + }) + + Convey("When calculating distance with different dimention matrices", func() { + vectorX.TCopy(vectorX) + So(func() { chebyshev.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) + }) + + }) +} diff --git a/metrics/pairwise/cranberra.go b/metrics/pairwise/cranberra.go new file mode 100644 index 0000000..bd1d2cd --- /dev/null +++ b/metrics/pairwise/cranberra.go @@ -0,0 +1,48 @@ +package pairwise + +import ( + "math" + + "github.com/gonum/matrix/mat64" +) + +type Cranberra struct{} + +func NewCranberra() *Cranberra { + return &Cranberra{} +} + +func cranberraDistanceStep(num float64, denom float64) float64 { + if num == .0 && denom == .0 { + return .0 + } else { + return num/denom + } +} + +func (self *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { + r1, c1 := vectorX.Dims() + r2, c2 := vectorY.Dims() + if r1 != r2 || c1 != c2 { + panic(mat64.ErrShape) + } + + subVector := mat64.NewDense(0, 0, nil) + subVector.Sub(vectorX, vectorY) + + sum := .0 + + for i := 0; i < r1; i++ { + for j := 0; j < c1; j++ { + p1 := vectorX.At(i, j) + p2 := vectorY.At(i, j) + + num := math.Abs(p1 - p2) + denom := math.Abs(p1) + math.Abs(p2) + + sum += cranberraDistanceStep(num, denom) + } + } + + return sum +} diff --git a/metrics/pairwise/cranberra_test.go b/metrics/pairwise/cranberra_test.go new file mode 100644 index 0000000..cc2d0cb --- /dev/null +++ b/metrics/pairwise/cranberra_test.go @@ -0,0 +1,52 @@ +package pairwise + +import ( + "testing" + + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" +) + +func TestCranberrra(t *testing.T) { + var vectorX, vectorY *mat64.Dense + cranberra := NewCranberra() + + Convey("Given two vectors that are same", t, func() { + vec := mat64.NewDense(7, 1, []float64 { 0, 1, -2, 3.4, 5, -6.7, 89 }) + distance := cranberra.Distance(vec, vec) + + Convey("The result should be 0", func() { + So(distance, ShouldEqual, 0) + }) + }) + + + Convey("Given two vectors", t, func() { + vectorX = mat64.NewDense(5, 1, []float64{ 1, 2, 3, 4, 9 }) + vectorY = mat64.NewDense(5, 1, []float64{ -5, -6, 7, 4, 3 }) + + Convey("When calculating distance with two vectors", func() { + result := cranberra.Distance(vectorX, vectorY) + + Convey("The result should be 2.9", func() { + So(result, ShouldEqual, 2.9) + }) + }) + + Convey("When calculating distance with row vectors", func() { + vectorX.TCopy(vectorX) + vectorY.TCopy(vectorY) + result := cranberra.Distance(vectorX, vectorY) + + Convey("The result should be 2.9", func() { + So(result, ShouldEqual, 2.9) + }) + }) + + Convey("When calculating distance with different dimention matrices", func() { + vectorX.TCopy(vectorX) + So(func() { cranberra.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) + }) + + }) +} diff --git a/metrics/pairwise/manhattan.go b/metrics/pairwise/manhattan.go index 39fc7b2..8afbadd 100644 --- a/metrics/pairwise/manhattan.go +++ b/metrics/pairwise/manhattan.go @@ -20,7 +20,6 @@ func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) floa if r1 != r2 || c1 != c2 { panic(mat64.ErrShape) } - // TODO: Add panic() whenever dimensions are not same result := .0 diff --git a/metrics/pairwise/manhattan_test.go b/metrics/pairwise/manhattan_test.go index 2f54b1e..5f065ae 100644 --- a/metrics/pairwise/manhattan_test.go +++ b/metrics/pairwise/manhattan_test.go @@ -1,42 +1,51 @@ package pairwise import ( - "testing" + "testing" - "github.com/gonum/matrix/mat64" - . "github.com/smartystreets/goconvey/convey" + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" ) func TestManhattan(t *testing.T) { - var vectorX, vectorY *mat64.Dense - manhattan := NewManhattan() + var vectorX, vectorY *mat64.Dense + manhattan := NewManhattan() - Convey("Given two vectors", t, func() { - vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3}) - vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5}) + Convey("Given two vectors that are same", t, func() { + vec := mat64.NewDense(7, 1, []float64 { 0, 1, -2, 3.4, 5, -6.7, 89 }) + distance := manhattan.Distance(vec, vec) - Convey("When calculating distance with column vectors", func() { - result := manhattan.Distance(vectorX, vectorY) + Convey("The result should be 0", func() { + So(distance, ShouldEqual, 0) + }) + }) - Convey("The result should be 5", func() { - So(result, ShouldEqual, 5) - }) - }) + Convey("Given two vectors", t, func() { + vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3}) + vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5}) - Convey("When calculating distance with row vectors", func() { - vectorX.TCopy(vectorX) - vectorY.TCopy(vectorY) - result := manhattan.Distance(vectorX, vectorY) + Convey("When calculating distance with column vectors", func() { + result := manhattan.Distance(vectorX, vectorY) - Convey("The result should be 5", func() { - So(result, ShouldEqual, 5) - }) - }) + Convey("The result should be 5", func() { + So(result, ShouldEqual, 5) + }) + }) - Convey("When calculating distance with different dimention matrices", func() { - vectorX.TCopy(vectorX) - So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) - }) + Convey("When calculating distance with row vectors", func() { + vectorX.TCopy(vectorX) + vectorY.TCopy(vectorY) + result := manhattan.Distance(vectorX, vectorY) - }) + Convey("The result should be 5", func() { + So(result, ShouldEqual, 5) + }) + }) + + Convey("When calculating distance with different dimention matrices", func() { + vectorX.TCopy(vectorX) + So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) + }) + + }) } From 13327e925008d6f3db9474710236a820980ca792 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 5 May 2014 08:32:38 +0200 Subject: [PATCH 3/5] Run goftm on all files ti fix indentation --- metrics/pairwise/chebyshev.go | 16 +++---- metrics/pairwise/chebyshev_test.go | 4 +- metrics/pairwise/cranberra.go | 24 +++++------ metrics/pairwise/cranberra_test.go | 7 ++- metrics/pairwise/manhattan.go | 2 +- metrics/pairwise/manhattan_test.go | 68 +++++++++++++++--------------- 6 files changed, 60 insertions(+), 61 deletions(-) diff --git a/metrics/pairwise/chebyshev.go b/metrics/pairwise/chebyshev.go index a89d992..7333910 100644 --- a/metrics/pairwise/chebyshev.go +++ b/metrics/pairwise/chebyshev.go @@ -1,15 +1,15 @@ package pairwise import ( - "math" + "math" - "github.com/gonum/matrix/mat64" + "github.com/gonum/matrix/mat64" ) type Chebyshev struct{} func NewChebyshev() *Chebyshev { - return &Chebyshev{} + return &Chebyshev{} } func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { @@ -19,16 +19,16 @@ func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) floa panic(mat64.ErrShape) } - subVector := mat64.NewDense(0, 0, nil) - subVector.Sub(vectorX, vectorY) + subVector := mat64.NewDense(0, 0, nil) + subVector.Sub(vectorX, vectorY) - max := float64(0) + max := float64(0) for i := 0; i < r1; i++ { for j := 0; j < c1; j++ { - max = math.Max(max, math.Abs(vectorX.At(i, j) - vectorY.At(i, j))) + max = math.Max(max, math.Abs(vectorX.At(i, j)-vectorY.At(i, j))) } } - return max + return max } diff --git a/metrics/pairwise/chebyshev_test.go b/metrics/pairwise/chebyshev_test.go index 2bdf3a4..592d227 100644 --- a/metrics/pairwise/chebyshev_test.go +++ b/metrics/pairwise/chebyshev_test.go @@ -12,8 +12,8 @@ func TestChebyshev(t *testing.T) { chebyshev := NewChebyshev() Convey("Given two vectors", t, func() { - vectorX = mat64.NewDense(4, 1, []float64{ 1, 2, 3, 4 }) - vectorY = mat64.NewDense(4, 1, []float64{ -5, -6, 7, 8 }) + vectorX = mat64.NewDense(4, 1, []float64{1, 2, 3, 4}) + vectorY = mat64.NewDense(4, 1, []float64{-5, -6, 7, 8}) Convey("When calculating distance with two vectors", func() { result := chebyshev.Distance(vectorX, vectorY) diff --git a/metrics/pairwise/cranberra.go b/metrics/pairwise/cranberra.go index bd1d2cd..68e35ee 100644 --- a/metrics/pairwise/cranberra.go +++ b/metrics/pairwise/cranberra.go @@ -1,23 +1,23 @@ package pairwise import ( - "math" + "math" - "github.com/gonum/matrix/mat64" + "github.com/gonum/matrix/mat64" ) type Cranberra struct{} func NewCranberra() *Cranberra { - return &Cranberra{} + return &Cranberra{} } func cranberraDistanceStep(num float64, denom float64) float64 { - if num == .0 && denom == .0 { - return .0 - } else { - return num/denom - } + if num == .0 && denom == .0 { + return .0 + } else { + return num / denom + } } func (self *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { @@ -27,10 +27,10 @@ func (self *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) floa panic(mat64.ErrShape) } - subVector := mat64.NewDense(0, 0, nil) - subVector.Sub(vectorX, vectorY) + subVector := mat64.NewDense(0, 0, nil) + subVector.Sub(vectorX, vectorY) - sum := .0 + sum := .0 for i := 0; i < r1; i++ { for j := 0; j < c1; j++ { @@ -44,5 +44,5 @@ func (self *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) floa } } - return sum + return sum } diff --git a/metrics/pairwise/cranberra_test.go b/metrics/pairwise/cranberra_test.go index cc2d0cb..a4da1f3 100644 --- a/metrics/pairwise/cranberra_test.go +++ b/metrics/pairwise/cranberra_test.go @@ -12,7 +12,7 @@ func TestCranberrra(t *testing.T) { cranberra := NewCranberra() Convey("Given two vectors that are same", t, func() { - vec := mat64.NewDense(7, 1, []float64 { 0, 1, -2, 3.4, 5, -6.7, 89 }) + vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89}) distance := cranberra.Distance(vec, vec) Convey("The result should be 0", func() { @@ -20,10 +20,9 @@ func TestCranberrra(t *testing.T) { }) }) - Convey("Given two vectors", t, func() { - vectorX = mat64.NewDense(5, 1, []float64{ 1, 2, 3, 4, 9 }) - vectorY = mat64.NewDense(5, 1, []float64{ -5, -6, 7, 4, 3 }) + vectorX = mat64.NewDense(5, 1, []float64{1, 2, 3, 4, 9}) + vectorY = mat64.NewDense(5, 1, []float64{-5, -6, 7, 4, 3}) Convey("When calculating distance with two vectors", func() { result := cranberra.Distance(vectorX, vectorY) diff --git a/metrics/pairwise/manhattan.go b/metrics/pairwise/manhattan.go index 8afbadd..6da0a6e 100644 --- a/metrics/pairwise/manhattan.go +++ b/metrics/pairwise/manhattan.go @@ -28,5 +28,5 @@ func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) floa result += math.Abs(vectorX.At(i, j) - vectorY.At(i, j)) } } - return result + return result } diff --git a/metrics/pairwise/manhattan_test.go b/metrics/pairwise/manhattan_test.go index 5f065ae..f5774e2 100644 --- a/metrics/pairwise/manhattan_test.go +++ b/metrics/pairwise/manhattan_test.go @@ -1,51 +1,51 @@ package pairwise import ( - "testing" + "testing" - "github.com/gonum/matrix/mat64" - . "github.com/smartystreets/goconvey/convey" + "github.com/gonum/matrix/mat64" + . "github.com/smartystreets/goconvey/convey" ) func TestManhattan(t *testing.T) { - var vectorX, vectorY *mat64.Dense - manhattan := NewManhattan() + var vectorX, vectorY *mat64.Dense + manhattan := NewManhattan() - Convey("Given two vectors that are same", t, func() { - vec := mat64.NewDense(7, 1, []float64 { 0, 1, -2, 3.4, 5, -6.7, 89 }) - distance := manhattan.Distance(vec, vec) + Convey("Given two vectors that are same", t, func() { + vec := mat64.NewDense(7, 1, []float64{0, 1, -2, 3.4, 5, -6.7, 89}) + distance := manhattan.Distance(vec, vec) - Convey("The result should be 0", func() { - So(distance, ShouldEqual, 0) - }) - }) + Convey("The result should be 0", func() { + So(distance, ShouldEqual, 0) + }) + }) - Convey("Given two vectors", t, func() { - vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3}) - vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5}) + Convey("Given two vectors", t, func() { + vectorX = mat64.NewDense(3, 1, []float64{2, 2, 3}) + vectorY = mat64.NewDense(3, 1, []float64{1, 4, 5}) - Convey("When calculating distance with column vectors", func() { - result := manhattan.Distance(vectorX, vectorY) + Convey("When calculating distance with column vectors", func() { + result := manhattan.Distance(vectorX, vectorY) - Convey("The result should be 5", func() { - So(result, ShouldEqual, 5) - }) - }) + Convey("The result should be 5", func() { + So(result, ShouldEqual, 5) + }) + }) - Convey("When calculating distance with row vectors", func() { - vectorX.TCopy(vectorX) - vectorY.TCopy(vectorY) - result := manhattan.Distance(vectorX, vectorY) + Convey("When calculating distance with row vectors", func() { + vectorX.TCopy(vectorX) + vectorY.TCopy(vectorY) + result := manhattan.Distance(vectorX, vectorY) - Convey("The result should be 5", func() { - So(result, ShouldEqual, 5) - }) - }) + Convey("The result should be 5", func() { + So(result, ShouldEqual, 5) + }) + }) - Convey("When calculating distance with different dimention matrices", func() { - vectorX.TCopy(vectorX) - So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) - }) + Convey("When calculating distance with different dimention matrices", func() { + vectorX.TCopy(vectorX) + So(func() { manhattan.Distance(vectorX, vectorY) }, ShouldPanicWith, mat64.ErrShape) + }) - }) + }) } From 1d250aab6b7491858ea10d28b63cec7238f3fefb Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 5 May 2014 08:45:29 +0200 Subject: [PATCH 4/5] Remove unused subVector calls --- metrics/pairwise/chebyshev.go | 3 --- metrics/pairwise/cranberra.go | 3 --- 2 files changed, 6 deletions(-) diff --git a/metrics/pairwise/chebyshev.go b/metrics/pairwise/chebyshev.go index 7333910..0068433 100644 --- a/metrics/pairwise/chebyshev.go +++ b/metrics/pairwise/chebyshev.go @@ -19,9 +19,6 @@ func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) floa panic(mat64.ErrShape) } - subVector := mat64.NewDense(0, 0, nil) - subVector.Sub(vectorX, vectorY) - max := float64(0) for i := 0; i < r1; i++ { diff --git a/metrics/pairwise/cranberra.go b/metrics/pairwise/cranberra.go index 68e35ee..8a972cd 100644 --- a/metrics/pairwise/cranberra.go +++ b/metrics/pairwise/cranberra.go @@ -27,9 +27,6 @@ func (self *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) floa panic(mat64.ErrShape) } - subVector := mat64.NewDense(0, 0, nil) - subVector.Sub(vectorX, vectorY) - sum := .0 for i := 0; i < r1; i++ { From 93894245adce72dc9668e5465c771321436e5bd3 Mon Sep 17 00:00:00 2001 From: Alex Petrov Date: Mon, 5 May 2014 22:30:04 +0200 Subject: [PATCH 5/5] Change wording for Eucledian Distance comments --- metrics/pairwise/euclidean.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/metrics/pairwise/euclidean.go b/metrics/pairwise/euclidean.go index e61eae6..882ff3b 100644 --- a/metrics/pairwise/euclidean.go +++ b/metrics/pairwise/euclidean.go @@ -12,15 +12,14 @@ func NewEuclidean() *Euclidean { return &Euclidean{} } -// Compute usual inner product in the sense of euclidean. +// Compute Eucledian inner product. func (self *Euclidean) InnerProduct(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { result := vectorX.Dot(vectorY) return result } -// Compute usual distance in the sense of euclidean. -// Also known as L2 distance. +// Compute Euclidean distance (also known as L2 distance). func (self *Euclidean) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { subVector := mat64.NewDense(0, 0, nil) subVector.Sub(vectorX, vectorY)