2017-08-29 17:27:02 -05:00
|
|
|
package gooxml
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/xml"
|
|
|
|
"log"
|
|
|
|
"strings"
|
2017-08-29 21:28:26 -05:00
|
|
|
"unicode"
|
2017-08-29 17:27:02 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
// XSDAny is used to marshal/unmarshal xsd:any types in the OOXML schema.
|
|
|
|
type XSDAny struct {
|
|
|
|
Attrs []xml.Attr
|
|
|
|
Tokens []xml.Token
|
|
|
|
}
|
|
|
|
|
2017-08-29 21:28:26 -05:00
|
|
|
var wellKnownSchemas = map[string]string{
|
|
|
|
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
|
|
|
"dcterms": "http://purl.org/dc/terms/",
|
|
|
|
"dc": "http://purl.org/dc/elements/1.1/",
|
|
|
|
}
|
|
|
|
var wellKnownSchemasInv = func() map[string]string {
|
|
|
|
r := map[string]string{}
|
|
|
|
for pfx, ns := range wellKnownSchemas {
|
|
|
|
r[ns] = pfx
|
|
|
|
}
|
|
|
|
return r
|
|
|
|
}()
|
|
|
|
|
2017-08-29 17:27:02 -05:00
|
|
|
func cloneToken(tok xml.Token) xml.Token {
|
|
|
|
switch el := tok.(type) {
|
|
|
|
case xml.CharData:
|
|
|
|
cd := xml.CharData{}
|
|
|
|
cd = append(cd, el...)
|
|
|
|
return cd
|
2017-08-29 21:28:26 -05:00
|
|
|
case xml.StartElement:
|
|
|
|
for i, attr := range el.Attr {
|
|
|
|
if ns, ok := wellKnownSchemas[attr.Name.Space]; ok {
|
|
|
|
el.Attr[i].Name.Space = ns
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return tok
|
|
|
|
case xml.EndElement:
|
2017-08-29 17:27:02 -05:00
|
|
|
return tok
|
|
|
|
default:
|
2017-08-29 21:28:26 -05:00
|
|
|
log.Fatalf("need to support %T", el)
|
2017-08-29 17:27:02 -05:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnmarshalXML implements the xml.Unmarshaler interface.
|
|
|
|
func (x *XSDAny) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
|
|
|
x.Tokens = append(x.Tokens, cloneToken(start))
|
|
|
|
lfor:
|
|
|
|
for {
|
|
|
|
tok, err := d.Token()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
switch el := tok.(type) {
|
|
|
|
default:
|
|
|
|
x.Tokens = append(x.Tokens, cloneToken(tok))
|
|
|
|
case xml.EndElement:
|
|
|
|
x.Tokens = append(x.Tokens, cloneToken(tok))
|
|
|
|
if el.Name == start.Name {
|
|
|
|
break lfor
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type nsSet struct {
|
|
|
|
urlToPrefix map[string]string
|
|
|
|
prefixToURL map[string]string
|
|
|
|
prefixes []string //required for deterministic output
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *nsSet) getPrefix(ns string) string {
|
2017-08-29 21:28:26 -05:00
|
|
|
// Common namespaces are used in these 'any' elements and some versions
|
|
|
|
// of Word really want to the prefix to match what they write out. This
|
|
|
|
// occurred primarily with docProps/core.xml
|
|
|
|
if pfx, ok := wellKnownSchemasInv[ns]; ok {
|
|
|
|
if _, ok := n.prefixToURL[pfx]; !ok {
|
|
|
|
n.prefixToURL[pfx] = ns
|
|
|
|
n.urlToPrefix[ns] = pfx
|
|
|
|
n.prefixes = append(n.prefixes, pfx)
|
|
|
|
}
|
|
|
|
return pfx
|
|
|
|
}
|
|
|
|
|
|
|
|
// trying to construct a decent looking valid prefix
|
|
|
|
ns = strings.TrimFunc(ns, func(r rune) bool {
|
|
|
|
return !unicode.IsLetter(r)
|
|
|
|
})
|
|
|
|
|
2017-08-29 17:27:02 -05:00
|
|
|
// do we have a prefix for this ns?
|
|
|
|
if sc, ok := n.urlToPrefix[ns]; ok {
|
|
|
|
return sc
|
|
|
|
}
|
|
|
|
|
|
|
|
// determine the last path portion of the namespace
|
|
|
|
// "urn:schemas-microsoft-com:office:office" = "office"
|
|
|
|
// "http://schemas.microsoft.com/office/word/2012/wordml" = "wordml"
|
|
|
|
split := strings.Split(ns, "/")
|
|
|
|
split = strings.Split(split[len(split)-1], ":")
|
|
|
|
// last segment of the namesapce
|
|
|
|
last := split[len(split)-1]
|
|
|
|
lng := 0
|
|
|
|
pfx := []byte{}
|
|
|
|
for {
|
|
|
|
if lng < len(last) {
|
|
|
|
pfx = append(pfx, last[lng])
|
|
|
|
} else {
|
|
|
|
pfx = append(pfx, '_')
|
|
|
|
}
|
|
|
|
lng++
|
|
|
|
// is this prefix unused?
|
|
|
|
if _, ok := n.prefixToURL[string(pfx)]; !ok {
|
|
|
|
n.prefixToURL[string(pfx)] = ns
|
|
|
|
n.urlToPrefix[ns] = string(pfx)
|
|
|
|
n.prefixes = append(n.prefixes, string(pfx))
|
|
|
|
return string(pfx)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n nsSet) applyToSE(se *xml.StartElement) {
|
|
|
|
if se.Name.Space == "" {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
pfx := n.getPrefix(se.Name.Space)
|
|
|
|
se.Name.Space = ""
|
|
|
|
se.Name.Local = pfx + ":" + se.Name.Local
|
|
|
|
tmpAttr := se.Attr
|
|
|
|
se.Attr = nil
|
|
|
|
for _, attr := range tmpAttr {
|
|
|
|
// skip these as we create them later
|
|
|
|
if attr.Name.Space == "xmlns" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if attr.Name.Space != "" {
|
|
|
|
pfx := n.getPrefix(attr.Name.Space)
|
|
|
|
attr.Name.Space = ""
|
|
|
|
attr.Name.Local = pfx + ":" + attr.Name.Local
|
|
|
|
}
|
|
|
|
se.Attr = append(se.Attr, attr)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n nsSet) applyToEE(ee *xml.EndElement) {
|
|
|
|
if ee.Name.Space == "" {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
pfx := n.getPrefix(ee.Name.Space)
|
|
|
|
ee.Name.Space = ""
|
|
|
|
ee.Name.Local = pfx + ":" + ee.Name.Local
|
|
|
|
}
|
|
|
|
|
|
|
|
// MarshalXML implements the xml.Marshaler interface.
|
|
|
|
func (x *XSDAny) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
|
|
|
|
if len(x.Tokens) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
ns := nsSet{urlToPrefix: make(map[string]string),
|
|
|
|
prefixToURL: make(map[string]string)}
|
|
|
|
|
|
|
|
// collect the namespaces
|
|
|
|
for _, tok := range x.Tokens {
|
|
|
|
if se, ok := tok.(xml.StartElement); ok {
|
|
|
|
if se.Name.Space != "" {
|
|
|
|
ns.getPrefix(se.Name.Space)
|
|
|
|
}
|
|
|
|
for _, attr := range se.Attr {
|
|
|
|
if attr.Name.Space != "" && attr.Name.Space != "xmlns" {
|
|
|
|
ns.getPrefix(attr.Name.Space)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// iniital element must be a StartElement
|
|
|
|
se := x.Tokens[0].(xml.StartElement)
|
|
|
|
ns.applyToSE(&se)
|
|
|
|
// add namespaces to first element
|
|
|
|
for _, pfx := range ns.prefixes {
|
|
|
|
ns := ns.prefixToURL[pfx]
|
|
|
|
se.Attr = append(se.Attr, xml.Attr{
|
|
|
|
Name: xml.Name{Local: "xmlns:" + pfx},
|
|
|
|
Value: ns,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
e.EncodeToken(se)
|
|
|
|
|
|
|
|
for _, tok := range x.Tokens[1:] {
|
|
|
|
if se, ok := tok.(xml.StartElement); ok {
|
|
|
|
ns.applyToSE(&se)
|
|
|
|
e.EncodeToken(se)
|
|
|
|
} else if ee, ok := tok.(xml.EndElement); ok {
|
|
|
|
ns.applyToEE(&ee)
|
|
|
|
e.EncodeToken(ee)
|
|
|
|
} else if err := e.EncodeToken(tok); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|