mirror of
https://github.com/unidoc/unioffice.git
synced 2025-04-25 13:48:53 +08:00
234 lines
6.2 KiB
Go
234 lines
6.2 KiB
Go
// Copyright 2017 Baliance. All rights reserved.
|
|
//
|
|
// Use of this source code is governed by the terms of the Affero GNU General
|
|
// Public License version 3.0 as published by the Free Software Foundation and
|
|
// appearing in the file LICENSE included in the packaging of this file. A
|
|
// commercial license can be purchased by contacting sales@baliance.com.
|
|
|
|
package gooxml
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// XSDAny is used to marshal/unmarshal xsd:any types in the OOXML schema.
|
|
type XSDAny struct {
|
|
XMLName xml.Name
|
|
Attrs []xml.Attr
|
|
Data []byte
|
|
Nodes []*XSDAny
|
|
}
|
|
|
|
var wellKnownSchemas = map[string]string{
|
|
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
|
|
"dc": "http://purl.org/dc/elements/1.1/",
|
|
"dcterms": "http://purl.org/dc/terms/",
|
|
"mc": "http://schemas.openxmlformats.org/markup-compatibility/2006",
|
|
"mo": "http://schemas.microsoft.com/office/mac/office/2008/main",
|
|
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
|
|
"w10": "urn:schemas-microsoft-com:office:word",
|
|
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
|
|
"w15": "http://schemas.microsoft.com/office/word/2012/wordml",
|
|
"wne": "http://schemas.microsoft.com/office/word/2006/wordml",
|
|
"wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
|
|
"wp14": "http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing",
|
|
"wpc": "http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas",
|
|
"wpg": "http://schemas.microsoft.com/office/word/2010/wordprocessingGroup",
|
|
"wpi": "http://schemas.microsoft.com/office/word/2010/wordprocessingInk",
|
|
"wps": "http://schemas.microsoft.com/office/word/2010/wordprocessingShape",
|
|
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
|
"x15ac": "http://schemas.microsoft.com/office/spreadsheetml/2010/11/ac",
|
|
}
|
|
|
|
var wellKnownSchemasInv = func() map[string]string {
|
|
r := map[string]string{}
|
|
for pfx, ns := range wellKnownSchemas {
|
|
r[ns] = pfx
|
|
}
|
|
return r
|
|
}()
|
|
|
|
type any struct {
|
|
XMLName xml.Name
|
|
Attrs []xml.Attr `xml:",any,attr"`
|
|
Nodes []*any `xml:",any"`
|
|
Data []byte `xml:",chardata"`
|
|
}
|
|
|
|
func dd(a *any) {
|
|
for _, n := range a.Nodes {
|
|
dd(n)
|
|
}
|
|
}
|
|
|
|
// UnmarshalXML implements the xml.Unmarshaler interface.
|
|
func (x *XSDAny) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
|
a := any{}
|
|
if err := d.DecodeElement(&a, &start); err != nil {
|
|
return err
|
|
}
|
|
dd(&a)
|
|
x.XMLName = a.XMLName
|
|
x.Attrs = a.Attrs
|
|
x.Data = a.Data
|
|
x.Nodes = convertToXNodes(a.Nodes)
|
|
return nil
|
|
}
|
|
|
|
type nsSet struct {
|
|
urlToPrefix map[string]string
|
|
prefixToURL map[string]string
|
|
prefixes []string //required for deterministic output
|
|
}
|
|
|
|
func (n *nsSet) getPrefix(ns string) string {
|
|
// Common namespaces are used in these 'any' elements and some versions
|
|
// of Word really want to the prefix to match what they write out. This
|
|
// occurred primarily with docProps/core.xml
|
|
if pfx, ok := wellKnownSchemasInv[ns]; ok {
|
|
if _, ok := n.prefixToURL[pfx]; !ok {
|
|
n.prefixToURL[pfx] = ns
|
|
n.urlToPrefix[ns] = pfx
|
|
n.prefixes = append(n.prefixes, pfx)
|
|
}
|
|
return pfx
|
|
}
|
|
|
|
// trying to construct a decent looking valid prefix
|
|
ns = strings.TrimFunc(ns, func(r rune) bool {
|
|
return !unicode.IsLetter(r)
|
|
})
|
|
|
|
// do we have a prefix for this ns?
|
|
if sc, ok := n.urlToPrefix[ns]; ok {
|
|
return sc
|
|
}
|
|
|
|
// determine the last path portion of the namespace
|
|
// "urn:schemas-microsoft-com:office:office" = "office"
|
|
// "http://schemas.microsoft.com/office/word/2012/wordml" = "wordml"
|
|
split := strings.Split(ns, "/")
|
|
split = strings.Split(split[len(split)-1], ":")
|
|
// last segment of the namesapce
|
|
last := split[len(split)-1]
|
|
lng := 0
|
|
pfx := []byte{}
|
|
for {
|
|
if lng < len(last) {
|
|
pfx = append(pfx, last[lng])
|
|
} else {
|
|
pfx = append(pfx, '_')
|
|
}
|
|
lng++
|
|
// is this prefix unused?
|
|
if _, ok := n.prefixToURL[string(pfx)]; !ok {
|
|
n.prefixToURL[string(pfx)] = ns
|
|
n.urlToPrefix[ns] = string(pfx)
|
|
n.prefixes = append(n.prefixes, string(pfx))
|
|
return string(pfx)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (n nsSet) applyToNode(a *any) {
|
|
if a.XMLName.Space == "" {
|
|
return
|
|
}
|
|
pfx := n.getPrefix(a.XMLName.Space)
|
|
a.XMLName.Space = ""
|
|
a.XMLName.Local = pfx + ":" + a.XMLName.Local
|
|
tmpAttr := a.Attrs
|
|
a.Attrs = nil
|
|
for _, attr := range tmpAttr {
|
|
// skip namespace prefix declaration atributes as we create them later
|
|
if attr.Name.Space == "xmlns" {
|
|
continue
|
|
}
|
|
if attr.Name.Space != "" {
|
|
pfx := n.getPrefix(attr.Name.Space)
|
|
attr.Name.Space = ""
|
|
attr.Name.Local = pfx + ":" + attr.Name.Local
|
|
}
|
|
a.Attrs = append(a.Attrs, attr)
|
|
}
|
|
for _, cn := range a.Nodes {
|
|
n.applyToNode(cn)
|
|
}
|
|
}
|
|
|
|
// collectNS walks a tree of nodes finding any non-default namespace being used
|
|
func (x *XSDAny) collectNS(ns *nsSet) {
|
|
if x.XMLName.Space != "" {
|
|
ns.getPrefix(x.XMLName.Space)
|
|
}
|
|
for _, attr := range x.Attrs {
|
|
if attr.Name.Space != "" && attr.Name.Space != "xmlns" {
|
|
ns.getPrefix(attr.Name.Space)
|
|
}
|
|
}
|
|
for _, n := range x.Nodes {
|
|
n.collectNS(ns)
|
|
}
|
|
}
|
|
|
|
func convertToXNodes(an []*any) []*XSDAny {
|
|
ret := []*XSDAny{}
|
|
for _, a := range an {
|
|
x := &XSDAny{}
|
|
x.XMLName = a.XMLName
|
|
x.Attrs = a.Attrs
|
|
x.Data = a.Data
|
|
x.Nodes = convertToXNodes(a.Nodes)
|
|
ret = append(ret, x)
|
|
}
|
|
return ret
|
|
}
|
|
func convertToNodes(xn []*XSDAny) []*any {
|
|
ret := []*any{}
|
|
for _, x := range xn {
|
|
a := &any{}
|
|
a.XMLName = x.XMLName
|
|
a.Attrs = x.Attrs
|
|
a.Data = x.Data
|
|
a.Nodes = convertToNodes(x.Nodes)
|
|
ret = append(ret, a)
|
|
}
|
|
return ret
|
|
}
|
|
|
|
// MarshalXML implements the xml.Marshaler interface.
|
|
func (x *XSDAny) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
|
|
start.Name = x.XMLName
|
|
start.Attr = x.Attrs
|
|
a := any{}
|
|
a.XMLName = x.XMLName
|
|
a.Attrs = x.Attrs
|
|
a.Data = x.Data
|
|
a.Nodes = convertToNodes(x.Nodes)
|
|
|
|
ns := nsSet{
|
|
urlToPrefix: map[string]string{},
|
|
prefixToURL: map[string]string{},
|
|
}
|
|
|
|
// collect any namespaces in use in the node tree
|
|
x.collectNS(&ns)
|
|
|
|
// apply our new namespaces to the node and its children
|
|
ns.applyToNode(&a)
|
|
|
|
// add our prefixes and namespaces to root element
|
|
for _, pfx := range ns.prefixes {
|
|
ns := ns.prefixToURL[pfx]
|
|
a.Attrs = append(a.Attrs, xml.Attr{
|
|
Name: xml.Name{Local: "xmlns:" + pfx},
|
|
Value: ns,
|
|
})
|
|
}
|
|
|
|
// finally write out our new element
|
|
return e.Encode(&a)
|
|
}
|