unioffice/xsdany.go

207 lines
4.8 KiB
Go
Raw Normal View History

package gooxml
import (
"encoding/xml"
"log"
"strings"
2017-08-29 21:28:26 -05:00
"unicode"
)
// XSDAny is used to marshal/unmarshal xsd:any types in the OOXML schema.
type XSDAny struct {
Attrs []xml.Attr
Tokens []xml.Token
}
2017-08-29 21:28:26 -05:00
var wellKnownSchemas = map[string]string{
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
"dcterms": "http://purl.org/dc/terms/",
"dc": "http://purl.org/dc/elements/1.1/",
}
var wellKnownSchemasInv = func() map[string]string {
r := map[string]string{}
for pfx, ns := range wellKnownSchemas {
r[ns] = pfx
}
return r
}()
func cloneToken(tok xml.Token) xml.Token {
switch el := tok.(type) {
case xml.CharData:
cd := xml.CharData{}
cd = append(cd, el...)
return cd
2017-08-29 21:28:26 -05:00
case xml.StartElement:
for i, attr := range el.Attr {
if ns, ok := wellKnownSchemas[attr.Name.Space]; ok {
el.Attr[i].Name.Space = ns
}
}
return tok
case xml.EndElement:
return tok
default:
2017-08-29 21:28:26 -05:00
log.Fatalf("need to support %T", el)
}
return nil
}
// UnmarshalXML implements the xml.Unmarshaler interface.
func (x *XSDAny) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
x.Tokens = append(x.Tokens, cloneToken(start))
lfor:
for {
tok, err := d.Token()
if err != nil {
return err
}
switch el := tok.(type) {
default:
x.Tokens = append(x.Tokens, cloneToken(tok))
case xml.EndElement:
x.Tokens = append(x.Tokens, cloneToken(tok))
if el.Name == start.Name {
break lfor
}
}
}
return nil
}
type nsSet struct {
urlToPrefix map[string]string
prefixToURL map[string]string
prefixes []string //required for deterministic output
}
func (n *nsSet) getPrefix(ns string) string {
2017-08-29 21:28:26 -05:00
// Common namespaces are used in these 'any' elements and some versions
// of Word really want to the prefix to match what they write out. This
// occurred primarily with docProps/core.xml
if pfx, ok := wellKnownSchemasInv[ns]; ok {
if _, ok := n.prefixToURL[pfx]; !ok {
n.prefixToURL[pfx] = ns
n.urlToPrefix[ns] = pfx
n.prefixes = append(n.prefixes, pfx)
}
return pfx
}
// trying to construct a decent looking valid prefix
ns = strings.TrimFunc(ns, func(r rune) bool {
return !unicode.IsLetter(r)
})
// do we have a prefix for this ns?
if sc, ok := n.urlToPrefix[ns]; ok {
return sc
}
// determine the last path portion of the namespace
// "urn:schemas-microsoft-com:office:office" = "office"
// "http://schemas.microsoft.com/office/word/2012/wordml" = "wordml"
split := strings.Split(ns, "/")
split = strings.Split(split[len(split)-1], ":")
// last segment of the namesapce
last := split[len(split)-1]
lng := 0
pfx := []byte{}
for {
if lng < len(last) {
pfx = append(pfx, last[lng])
} else {
pfx = append(pfx, '_')
}
lng++
// is this prefix unused?
if _, ok := n.prefixToURL[string(pfx)]; !ok {
n.prefixToURL[string(pfx)] = ns
n.urlToPrefix[ns] = string(pfx)
n.prefixes = append(n.prefixes, string(pfx))
return string(pfx)
}
}
}
func (n nsSet) applyToSE(se *xml.StartElement) {
if se.Name.Space == "" {
return
}
pfx := n.getPrefix(se.Name.Space)
se.Name.Space = ""
se.Name.Local = pfx + ":" + se.Name.Local
tmpAttr := se.Attr
se.Attr = nil
for _, attr := range tmpAttr {
// skip these as we create them later
if attr.Name.Space == "xmlns" {
continue
}
if attr.Name.Space != "" {
pfx := n.getPrefix(attr.Name.Space)
attr.Name.Space = ""
attr.Name.Local = pfx + ":" + attr.Name.Local
}
se.Attr = append(se.Attr, attr)
}
}
func (n nsSet) applyToEE(ee *xml.EndElement) {
if ee.Name.Space == "" {
return
}
pfx := n.getPrefix(ee.Name.Space)
ee.Name.Space = ""
ee.Name.Local = pfx + ":" + ee.Name.Local
}
// MarshalXML implements the xml.Marshaler interface.
func (x *XSDAny) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
if len(x.Tokens) == 0 {
return nil
}
ns := nsSet{urlToPrefix: make(map[string]string),
prefixToURL: make(map[string]string)}
// collect the namespaces
for _, tok := range x.Tokens {
if se, ok := tok.(xml.StartElement); ok {
if se.Name.Space != "" {
ns.getPrefix(se.Name.Space)
}
for _, attr := range se.Attr {
if attr.Name.Space != "" && attr.Name.Space != "xmlns" {
ns.getPrefix(attr.Name.Space)
}
}
}
}
// iniital element must be a StartElement
se := x.Tokens[0].(xml.StartElement)
ns.applyToSE(&se)
// add namespaces to first element
for _, pfx := range ns.prefixes {
ns := ns.prefixToURL[pfx]
se.Attr = append(se.Attr, xml.Attr{
Name: xml.Name{Local: "xmlns:" + pfx},
Value: ns,
})
}
e.EncodeToken(se)
for _, tok := range x.Tokens[1:] {
if se, ok := tok.(xml.StartElement); ok {
ns.applyToSE(&se)
e.EncodeToken(se)
} else if ee, ok := tok.(xml.EndElement); ok {
ns.applyToEE(&ee)
e.EncodeToken(ee)
} else if err := e.EncodeToken(tok); err != nil {
return err
}
}
return nil
}