mirror of
https://github.com/sjwhitworth/golearn.git
synced 2025-04-26 13:49:14 +08:00
Made versions of CSV-readers w/ io.ReadSeeker
Each method now ontains a -FromReader counterpart such that it'll allow use of those helper-methods even when someone does not have his data in a physical file. The original methods make use of those -FromReader methods. The reader is being reset (Seek(0, 0)) before every method- specific read, to ensure it's reading from the start of the reader. Test cases are not yet touched, and I'm not sure they should.
This commit is contained in:
parent
b7ad1fe499
commit
db086a864e
129
base/csv.go
129
base/csv.go
@ -5,21 +5,15 @@ import (
|
|||||||
"encoding/csv"
|
"encoding/csv"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ParseCSVGetRows returns the number of rows in a given file.
|
// ParseCSVGetRowsFromReader returns the number of rows in a given reader.
|
||||||
func ParseCSVGetRows(filepath string) (int, error) {
|
func ParseCSVGetRowsFromReader(r io.ReadSeeker) (int, error) {
|
||||||
file, err := os.Open(filepath)
|
r.Seek(0, 0)
|
||||||
if err != nil {
|
reader := csv.NewReader(r)
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
defer file.Close()
|
|
||||||
|
|
||||||
reader := csv.NewReader(file)
|
|
||||||
counter := 0
|
counter := 0
|
||||||
for {
|
for {
|
||||||
_, err := reader.Read()
|
_, err := reader.Read()
|
||||||
@ -33,22 +27,17 @@ func ParseCSVGetRows(filepath string) (int, error) {
|
|||||||
return counter, nil
|
return counter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseCSVEstimateFilePrecision determines what the maximum number of
|
// ParseCSVEstimateFilePrecisionFromReader determines what the maximum number of
|
||||||
// digits occuring anywhere after the decimal point within the file.
|
// digits occuring anywhere after the decimal point within the reader.
|
||||||
func ParseCSVEstimateFilePrecision(filepath string) (int, error) {
|
func ParseCSVEstimateFilePrecisionFromReader(r io.ReadSeeker) (int, error) {
|
||||||
// Creat a basic regexp
|
// Creat a basic regexp
|
||||||
rexp := regexp.MustCompile("[0-9]+(.[0-9]+)?")
|
rexp := regexp.MustCompile("[0-9]+(.[0-9]+)?")
|
||||||
|
|
||||||
// Open the source file
|
|
||||||
f, err := os.Open(filepath)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
// Scan through the file line-by-line
|
// Scan through the file line-by-line
|
||||||
maxL := 0
|
maxL := 0
|
||||||
scanner := bufio.NewScanner(f)
|
|
||||||
|
r.Seek(0, 0)
|
||||||
|
scanner := bufio.NewScanner(r)
|
||||||
lineCount := 0
|
lineCount := 0
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
if lineCount > 5 {
|
if lineCount > 5 {
|
||||||
@ -79,27 +68,23 @@ func ParseCSVEstimateFilePrecision(filepath string) (int, error) {
|
|||||||
return maxL, nil
|
return maxL, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseCSVGetAttributes returns an ordered slice of appropriate-ly typed
|
// ParseCSVGetAttributesFromReader returns an ordered slice of appropriate-ly typed
|
||||||
// and named Attributes.
|
// and named Attributes.
|
||||||
func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
|
func ParseCSVGetAttributesFromReader(r io.ReadSeeker, hasHeaders bool) []Attribute {
|
||||||
attrs := ParseCSVSniffAttributeTypes(filepath, hasHeaders)
|
attrs := ParseCSVSniffAttributeTypesFromReader(r, hasHeaders)
|
||||||
names := ParseCSVSniffAttributeNames(filepath, hasHeaders)
|
names := ParseCSVSniffAttributeNamesFromReader(r, hasHeaders)
|
||||||
for i, attr := range attrs {
|
for i, attr := range attrs {
|
||||||
attr.SetName(names[i])
|
attr.SetName(names[i])
|
||||||
}
|
}
|
||||||
return attrs
|
return attrs
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseCSVSniffAttributeNames returns a slice containing the top row
|
// ParseCSVSniffAttributeNamesFromReader returns a slice containing the top row
|
||||||
// of a given CSV file, or placeholders if hasHeaders is false.
|
// of a given reader with CSV-contents, or placeholders if hasHeaders is false.
|
||||||
func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
|
func ParseCSVSniffAttributeNamesFromReader(r io.ReadSeeker, hasHeaders bool) []string {
|
||||||
file, err := os.Open(filepath)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
defer file.Close()
|
|
||||||
|
|
||||||
reader := csv.NewReader(file)
|
r.Seek(0, 0)
|
||||||
|
reader := csv.NewReader(r)
|
||||||
headers, err := reader.Read()
|
headers, err := reader.Read()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@ -119,20 +104,16 @@ func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseCSVSniffAttributeTypes returns a slice of appropriately-typed Attributes.
|
// ParseCSVSniffAttributeTypesFromReader returns a slice of appropriately-typed Attributes.
|
||||||
//
|
//
|
||||||
// The type of a given attribute is determined by looking at the first data row
|
// The type of a given attribute is determined by looking at the first data row
|
||||||
// of the CSV.
|
// of the CSV.
|
||||||
func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute {
|
func ParseCSVSniffAttributeTypesFromReader(r io.ReadSeeker, hasHeaders bool) []Attribute {
|
||||||
var attrs []Attribute
|
var attrs []Attribute
|
||||||
// Open file
|
|
||||||
file, err := os.Open(filepath)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
defer file.Close()
|
|
||||||
// Create the CSV reader
|
// Create the CSV reader
|
||||||
reader := csv.NewReader(file)
|
r.Seek(0, 0)
|
||||||
|
reader := csv.NewReader(r)
|
||||||
if hasHeaders {
|
if hasHeaders {
|
||||||
// Skip the headers
|
// Skip the headers
|
||||||
_, err := reader.Read()
|
_, err := reader.Read()
|
||||||
@ -161,7 +142,7 @@ func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Estimate file precision
|
// Estimate file precision
|
||||||
maxP, err := ParseCSVEstimateFilePrecision(filepath)
|
maxP, err := ParseCSVEstimateFilePrecisionFromReader(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -175,7 +156,7 @@ func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ParseCSVBuildInstancesFromReader updates an [[#UpdatableDataGrid]] from a io.Reader
|
// ParseCSVBuildInstancesFromReader updates an [[#UpdatableDataGrid]] from a io.Reader
|
||||||
func ParseCSVBuildInstancesFromReader(r io.Reader, attrs []Attribute, hasHeader bool, u UpdatableDataGrid) (err error) {
|
func ParseCSVBuildInstancesFromReader(r io.ReadSeeker, attrs []Attribute, hasHeader bool, u UpdatableDataGrid) (err error) {
|
||||||
var rowCounter int
|
var rowCounter int
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -183,11 +164,13 @@ func ParseCSVBuildInstancesFromReader(r io.Reader, attrs []Attribute, hasHeader
|
|||||||
if _, ok := r.(runtime.Error); ok {
|
if _, ok := r.(runtime.Error); ok {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
err = fmt.Errorf("Error at line %d (error %s)", rowCounter, r.(error))
|
err = fmt.Errorf("error at line %d (error %s)", rowCounter, r.(error))
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
specs := ResolveAttributes(u, attrs)
|
specs := ResolveAttributes(u, attrs)
|
||||||
|
|
||||||
|
r.Seek(0, 0)
|
||||||
reader := csv.NewReader(r)
|
reader := csv.NewReader(r)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
@ -212,19 +195,11 @@ func ParseCSVBuildInstancesFromReader(r io.Reader, attrs []Attribute, hasHeader
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseCSVToInstances reads the CSV file given by filepath and returns
|
// ParseCSVToInstancesFromReader reads the reader containing CSV and returns
|
||||||
// the read Instances.
|
// the read Instances.
|
||||||
func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *DenseInstances, err error) {
|
func ParseCSVToInstancesFromReader(r io.ReadSeeker, hasHeaders bool) (instances *DenseInstances, err error) {
|
||||||
|
|
||||||
// Open the file
|
|
||||||
f, err := os.Open(filepath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
// Read the number of rows in the file
|
// Read the number of rows in the file
|
||||||
rowCount, err := ParseCSVGetRows(filepath)
|
rowCount, err := ParseCSVGetRowsFromReader(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -234,7 +209,7 @@ func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *DenseInst
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Read the row headers
|
// Read the row headers
|
||||||
attrs := ParseCSVGetAttributes(filepath, hasHeaders)
|
attrs := ParseCSVGetAttributesFromReader(r, hasHeaders)
|
||||||
specs := make([]AttributeSpec, len(attrs))
|
specs := make([]AttributeSpec, len(attrs))
|
||||||
// Allocate the Instances to return
|
// Allocate the Instances to return
|
||||||
instances = NewDenseInstances()
|
instances = NewDenseInstances()
|
||||||
@ -244,7 +219,7 @@ func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *DenseInst
|
|||||||
}
|
}
|
||||||
instances.Extend(rowCount)
|
instances.Extend(rowCount)
|
||||||
|
|
||||||
err = ParseCSVBuildInstancesFromReader(f, attrs, hasHeaders, instances)
|
err = ParseCSVBuildInstancesFromReader(r, attrs, hasHeaders, instances)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -268,19 +243,11 @@ func ParseMatchAttributes(attrs, templateAttrs []Attribute) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseCSVToInstancesTemplated reads the CSV file given by filepath and returns
|
// ParseCSVToTemplatedInstancesFromReader reads the reader containing CSV and returns
|
||||||
// the read Instances, using another already read DenseInstances as a template.
|
// the read Instances, using another already read DenseInstances as a template.
|
||||||
func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error) {
|
func ParseCSVToTemplatedInstancesFromReader(r io.ReadSeeker, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error) {
|
||||||
|
|
||||||
// Open the file
|
|
||||||
f, err := os.Open(filepath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
// Read the number of rows in the file
|
// Read the number of rows in the file
|
||||||
rowCount, err := ParseCSVGetRows(filepath)
|
rowCount, err := ParseCSVGetRowsFromReader(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -290,7 +257,7 @@ func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, template *De
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Read the row headers
|
// Read the row headers
|
||||||
attrs := ParseCSVGetAttributes(filepath, hasHeaders)
|
attrs := ParseCSVGetAttributesFromReader(r, hasHeaders)
|
||||||
templateAttrs := template.AllAttributes()
|
templateAttrs := template.AllAttributes()
|
||||||
ParseMatchAttributes(attrs, templateAttrs)
|
ParseMatchAttributes(attrs, templateAttrs)
|
||||||
|
|
||||||
@ -298,7 +265,7 @@ func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, template *De
|
|||||||
instances = CopyDenseInstances(template, templateAttrs)
|
instances = CopyDenseInstances(template, templateAttrs)
|
||||||
instances.Extend(rowCount)
|
instances.Extend(rowCount)
|
||||||
|
|
||||||
err = ParseCSVBuildInstancesFromReader(f, attrs, hasHeaders, instances)
|
err = ParseCSVBuildInstancesFromReader(r, attrs, hasHeaders, instances)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -313,26 +280,18 @@ func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, template *De
|
|||||||
return instances, nil
|
return instances, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseCSVToInstancesWithAttributeGroups reads the CSV file given by filepath,
|
// ParseCSVToInstancesWithAttributeGroupsFromReader reads the CSV file given by filepath,
|
||||||
// and returns the read DenseInstances, but also makes sure to group any Attributes
|
// and returns the read DenseInstances, but also makes sure to group any Attributes
|
||||||
// specified in the first argument and also any class Attributes specified in the second
|
// specified in the first argument and also any class Attributes specified in the second
|
||||||
func ParseCSVToInstancesWithAttributeGroups(filepath string, attrGroups, classAttrGroups map[string]string, attrOverrides map[int]Attribute, hasHeaders bool) (instances *DenseInstances, err error) {
|
func ParseCSVToInstancesWithAttributeGroupsFromReader(r io.ReadSeeker, attrGroups, classAttrGroups map[string]string, attrOverrides map[int]Attribute, hasHeaders bool) (instances *DenseInstances, err error) {
|
||||||
|
|
||||||
// Open file
|
|
||||||
f, err := os.Open(filepath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
// Read row count
|
// Read row count
|
||||||
rowCount, err := ParseCSVGetRows(filepath)
|
rowCount, err := ParseCSVGetRowsFromReader(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the row headers
|
// Read the row headers
|
||||||
attrs := ParseCSVGetAttributes(filepath, hasHeaders)
|
attrs := ParseCSVGetAttributesFromReader(r, hasHeaders)
|
||||||
for i := range attrs {
|
for i := range attrs {
|
||||||
if a, ok := attrOverrides[i]; ok {
|
if a, ok := attrOverrides[i]; ok {
|
||||||
attrs[i] = a
|
attrs[i] = a
|
||||||
@ -399,7 +358,7 @@ func ParseCSVToInstancesWithAttributeGroups(filepath string, attrGroups, classAt
|
|||||||
// Allocate
|
// Allocate
|
||||||
instances.Extend(rowCount)
|
instances.Extend(rowCount)
|
||||||
|
|
||||||
err = ParseCSVBuildInstancesFromReader(f, attrs, hasHeaders, instances)
|
err = ParseCSVBuildInstancesFromReader(r, attrs, hasHeaders, instances)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
108
base/filewrapper.go
Normal file
108
base/filewrapper.go
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseCSVGetRows returns the number of rows in a given file.
|
||||||
|
func ParseCSVGetRows(filepath string) (int, error) {
|
||||||
|
f, err := os.Open(filepath)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return ParseCSVGetRowsFromReader(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseCSVEstimateFilePrecision determines what the maximum number of
|
||||||
|
// digits occuring anywhere after the decimal point within the file.
|
||||||
|
func ParseCSVEstimateFilePrecision(filepath string) (int, error) {
|
||||||
|
// Open the source file
|
||||||
|
f, err := os.Open(filepath)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return ParseCSVEstimateFilePrecisionFromReader(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseCSVGetAttributes returns an ordered slice of appropriate-ly typed
|
||||||
|
// and named Attributes.
|
||||||
|
func ParseCSVGetAttributes(filepath string, hasHeaders bool) []Attribute {
|
||||||
|
f, err := os.Open(filepath)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return ParseCSVGetAttributesFromReader(f, hasHeaders)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseCSVSniffAttributeNames returns a slice containing the top row
|
||||||
|
// of a given CSV file, or placeholders if hasHeaders is false.
|
||||||
|
func ParseCSVSniffAttributeNames(filepath string, hasHeaders bool) []string {
|
||||||
|
f, err := os.Open(filepath)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return ParseCSVSniffAttributeNamesFromReader(f, hasHeaders)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseCSVSniffAttributeTypes returns a slice of appropriately-typed Attributes.
|
||||||
|
//
|
||||||
|
// The type of a given attribute is determined by looking at the first data row
|
||||||
|
// of the CSV.
|
||||||
|
func ParseCSVSniffAttributeTypes(filepath string, hasHeaders bool) []Attribute {
|
||||||
|
// Open file
|
||||||
|
f, err := os.Open(filepath)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return ParseCSVSniffAttributeTypesFromReader(f, hasHeaders)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseCSVToInstances reads the CSV file given by filepath and returns
|
||||||
|
// the read Instances.
|
||||||
|
func ParseCSVToInstances(filepath string, hasHeaders bool) (instances *DenseInstances, err error) {
|
||||||
|
// Open the file
|
||||||
|
f, err := os.Open(filepath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return ParseCSVToInstancesFromReader(f, hasHeaders)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseCSVToInstancesTemplated reads the CSV file given by filepath and returns
|
||||||
|
// the read Instances, using another already read DenseInstances as a template.
|
||||||
|
func ParseCSVToTemplatedInstances(filepath string, hasHeaders bool, template *DenseInstances) (instances *DenseInstances, err error) {
|
||||||
|
// Open the file
|
||||||
|
f, err := os.Open(filepath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return ParseCSVToTemplatedInstancesFromReader(f, hasHeaders, template)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseCSVToInstancesWithAttributeGroups reads the CSV file given by filepath,
|
||||||
|
// and returns the read DenseInstances, but also makes sure to group any Attributes
|
||||||
|
// specified in the first argument and also any class Attributes specified in the second
|
||||||
|
func ParseCSVToInstancesWithAttributeGroups(filepath string, attrGroups, classAttrGroups map[string]string, attrOverrides map[int]Attribute, hasHeaders bool) (instances *DenseInstances, err error) {
|
||||||
|
// Open file
|
||||||
|
f, err := os.Open(filepath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return ParseCSVToInstancesWithAttributeGroupsFromReader(f, attrGroups, classAttrGroups, attrOverrides, hasHeaders)
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user