From 4e81045015b5e70af2dd247b25ca0d3de4e6c53b Mon Sep 17 00:00:00 2001 From: Stephen Whitworth Date: Sun, 29 Dec 2013 00:03:42 +0000 Subject: [PATCH] Tidying up --- base.go | 53 -------------- base/base.go | 6 +- knn.go | 129 ----------------------------------- knn/knn.go | 2 +- testknn.go => knn/testknn.go | 0 5 files changed, 4 insertions(+), 186 deletions(-) delete mode 100644 base.go delete mode 100644 knn.go rename testknn.go => knn/testknn.go (100%) diff --git a/base.go b/base.go deleted file mode 100644 index 3a5d7b2..0000000 --- a/base.go +++ /dev/null @@ -1,53 +0,0 @@ -package base - -import ( - "os" - "encoding/csv" - "fmt" - "io" - "strconv" - ) - -func ParseCsv(filepath string, text int, columns []int) (int, int,[]string, []string, []float64) { - labels := make([]string, 10) - data := make([]float64, 10) - headers := make([]string, 2) - rows := 0 - - file, err := os.Open(filepath) - if err != nil { - fmt.Println("Error:", err) - } - defer file.Close() - - reader := csv.NewReader(file) - - headerrow, _ := reader.Read() - - for _, col := range columns { - entry := headerrow[col] - headers = append(headers, entry) - } - - for { - record, err := reader.Read() - if err == io.EOF { - break - } else if err != nil { - fmt.Println("Error:", err) - } - - // - labels = append(labels, record[text]) - - //Iterate over our rows and append the values to a slice - for _, col := range columns { - entry := record[col] - number, _ := strconv.ParseFloat(entry, 64) - data = append(data, number) - } - rows += 1 - } - cols := len(columns) - return cols, rows, headers, labels, data -} \ No newline at end of file diff --git a/base/base.go b/base/base.go index 142e576..382e909 100644 --- a/base/base.go +++ b/base/base.go @@ -1,4 +1,4 @@ -package base +package golearn import ( "os" @@ -8,8 +8,8 @@ import ( "strconv" ) -//Need to implement base functions here, like parsing CSV etc. - +//Parses a CSV file, returning the number of columns and rows, the headers, the labels associated with +//classification, and the data that will be used for training. func ParseCsv(filepath string, text int, columns []int) (int, int,[]string, []string, []float64) { labels := make([]string, 10) data := make([]float64, 10) diff --git a/knn.go b/knn.go deleted file mode 100644 index ff53e66..0000000 --- a/knn.go +++ /dev/null @@ -1,129 +0,0 @@ -package main - -import ( - mat "github.com/skelterjohn/go.matrix" - rand "math/rand" - "math" - "fmt" - "sort" - "/base/" - // "errors" - ) - -//Sorts a map by value size in .s property -type sortedMap struct { - m map[int]float64 - s []int -} - -func (sm *sortedMap) Len() int { - return len(sm.m) -} - -func (sm *sortedMap) Less(i, j int) bool { - return sm.m[sm.s[i]] < sm.m[sm.s[j]] -} - -func (sm *sortedMap) Swap(i, j int) { - sm.s[i], sm.s[j] = sm.s[j], sm.s[i] -} - -func sortMap(m map[int]float64) []int { - sm := new(sortedMap) - sm.m = m - sm.s = make([]int, len(m)) - i := 0 - for key, _ := range m { - sm.s[i] = key - i++ - } - sort.Sort(sm) - return sm.s -} - -type KNNClassifier struct { - Data mat.DenseMatrix - Name string - Labels []string -} - -func RandomArray(n int) []float64 { - ReturnedArray := make([]float64, n) - for i := 0; i < n; i++ { - ReturnedArray[i] = rand.Float64() - } - return ReturnedArray -} - -//Mints a new classifier -func (KNN *KNNClassifier) New(name string, labels []string, numbers []float64, x int, y int) { - - // if x != len(KNN.Labels) { - // return errors.New("KNN: There must be a label for each row") - // } - - KNN.Data = *mat.MakeDenseMatrix(numbers, x, y) - KNN.Name = name - KNN.Labels = labels -} - -//Computes a variety of distance metrics between two vectors -//Only returns Euclidean distance at the moment -func (KNN *KNNClassifier) ComputeDistance(vector *mat.DenseMatrix, testrow *mat.DenseMatrix) float64 { - var sum float64 - - difference, err := testrow.MinusDense(vector) - flat := difference.Array() - - if err != nil { - fmt.Println(err) - } - - for _, i := range flat { - squared := math.Pow(i, 2) - sum += squared - } - - eucdistance := math.Sqrt(sum) - return eucdistance -} - -//Returns a classification based on a vector input -//Just need to build the max voting function -func (KNN *KNNClassifier) Predict(vector *mat.DenseMatrix, K int) ([]string, []int) { - - rows := KNN.Data.Rows() - rownumbers := make(map[int]float64) - labels := make([]string, K) - - for i := 0; i < rows; i++{ - row := KNN.Data.GetRowVector(i) - eucdistance := KNN.ComputeDistance(row, vector) - rownumbers[i] = eucdistance - } - - sorted := sortMap(rownumbers) - values := sorted[:K] - - for _, elem := range values { - labels = append(labels, KNN.Labels[elem]) - } - - return labels, values -} - -//Returns a label, given an index -func (KNN *KNNClassifier) GetLabel(index int) string { - return KNN.Labels[index] -} - -func main(){ - for { - cols, rows, _, labels, data := base.ParseCsv("/Users/stephenwhitworth/Desktop/model.csv", 1, []int{2,3}) - knn := KNNClassifier{} - random := mat.MakeDenseMatrix([]float64{410,433,400,400},1,2) - knn.New("Testing", labels, data, rows, cols) - labels, indexes := knn.Predict(random, 1) - fmt.Println(labels, indexes) - } -} \ No newline at end of file diff --git a/knn/knn.go b/knn/knn.go index cda2b89..48b1ffa 100644 --- a/knn/knn.go +++ b/knn/knn.go @@ -1,4 +1,4 @@ -package main +package golearn import ( mat "github.com/skelterjohn/go.matrix" diff --git a/testknn.go b/knn/testknn.go similarity index 100% rename from testknn.go rename to knn/testknn.go