mirror of
https://github.com/unidoc/unioffice.git
synced 2025-04-25 13:48:53 +08:00

* update license and terms * Fixes * Create ACKNOWLEDGEMENTS.md * Update ACKNOWLEDGEMENTS.md * Revert go.mod changes and remove go1.11 tests
270 lines
7.1 KiB
Go
270 lines
7.1 KiB
Go
// Copyright 2017 FoxyUtils ehf. All rights reserved.
|
|
//
|
|
// Use of this software package and source code is governed by the terms of the
|
|
// UniDoc End User License Agreement (EULA) that is available at:
|
|
// https://unidoc.io/eula/
|
|
// A trial license code for evaluation can be obtained at https://unidoc.io.
|
|
|
|
package unioffice
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// XSDAny is used to marshal/unmarshal xsd:any types in the OOXML schema.
|
|
type XSDAny struct {
|
|
XMLName xml.Name
|
|
Attrs []xml.Attr
|
|
Data []byte
|
|
Nodes []*XSDAny
|
|
}
|
|
|
|
var wellKnownSchemas = map[string]string{
|
|
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
|
|
"dc": "http://purl.org/dc/elements/1.1/",
|
|
"dcterms": "http://purl.org/dc/terms/",
|
|
"mc": "http://schemas.openxmlformats.org/markup-compatibility/2006",
|
|
"mo": "http://schemas.microsoft.com/office/mac/office/2008/main",
|
|
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
|
|
"w10": "urn:schemas-microsoft-com:office:word",
|
|
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
|
|
"w15": "http://schemas.microsoft.com/office/word/2012/wordml",
|
|
"wne": "http://schemas.microsoft.com/office/word/2006/wordml",
|
|
"wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
|
|
"wp14": "http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing",
|
|
"wpc": "http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas",
|
|
"wpg": "http://schemas.microsoft.com/office/word/2010/wordprocessingGroup",
|
|
"wpi": "http://schemas.microsoft.com/office/word/2010/wordprocessingInk",
|
|
"wps": "http://schemas.microsoft.com/office/word/2010/wordprocessingShape",
|
|
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
|
"x15ac": "http://schemas.microsoft.com/office/spreadsheetml/2010/11/ac",
|
|
"w16se": "http://schemas.microsoft.com/office/word/2015/wordml/symex",
|
|
"w16cid": "http://schemas.microsoft.com/office/word/2016/wordml/cid",
|
|
"w16": "http://schemas.microsoft.com/office/word/2018/wordml",
|
|
"w16cex": "http://schemas.microsoft.com/office/word/2018/wordml/cex",
|
|
}
|
|
|
|
var wellKnownSchemasInv = func() map[string]string {
|
|
r := map[string]string{}
|
|
for pfx, ns := range wellKnownSchemas {
|
|
r[ns] = pfx
|
|
}
|
|
return r
|
|
}()
|
|
|
|
type any struct {
|
|
XMLName xml.Name
|
|
Attrs []xml.Attr `xml:",any,attr"`
|
|
Nodes []*any `xml:",any"`
|
|
Data []byte `xml:",chardata"`
|
|
}
|
|
|
|
func dd(a *any) {
|
|
for _, n := range a.Nodes {
|
|
dd(n)
|
|
}
|
|
}
|
|
|
|
// UnmarshalXML implements the xml.Unmarshaler interface.
|
|
func (x *XSDAny) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
|
a := any{}
|
|
if err := d.DecodeElement(&a, &start); err != nil {
|
|
return err
|
|
}
|
|
dd(&a)
|
|
x.XMLName = a.XMLName
|
|
x.Attrs = a.Attrs
|
|
x.Data = a.Data
|
|
x.Nodes = convertToXNodes(a.Nodes)
|
|
return nil
|
|
}
|
|
|
|
type nsSet struct {
|
|
urlToPrefix map[string]string
|
|
prefixToURL map[string]string
|
|
prefixes []string //required for deterministic output
|
|
}
|
|
|
|
func (n *nsSet) getPrefix(ns string) string {
|
|
// Common namespaces are used in these 'any' elements and some versions
|
|
// of Word really want to the prefix to match what they write out. This
|
|
// occurred primarily with docProps/core.xml
|
|
if pfx, ok := wellKnownSchemasInv[ns]; ok {
|
|
if _, ok := n.prefixToURL[pfx]; !ok {
|
|
n.prefixToURL[pfx] = ns
|
|
n.urlToPrefix[ns] = pfx
|
|
n.prefixes = append(n.prefixes, pfx)
|
|
}
|
|
return pfx
|
|
}
|
|
|
|
// trying to construct a decent looking valid prefix
|
|
ns = strings.TrimFunc(ns, func(r rune) bool {
|
|
return !unicode.IsLetter(r)
|
|
})
|
|
|
|
// do we have a prefix for this ns?
|
|
if sc, ok := n.urlToPrefix[ns]; ok {
|
|
return sc
|
|
}
|
|
|
|
// determine the last path portion of the namespace
|
|
// "urn:schemas-microsoft-com:office:office" = "office"
|
|
// "http://schemas.microsoft.com/office/word/2012/wordml" = "wordml"
|
|
split := strings.Split(ns, "/")
|
|
split = strings.Split(split[len(split)-1], ":")
|
|
// last segment of the namesapce
|
|
last := split[len(split)-1]
|
|
lng := 0
|
|
pfx := []byte{}
|
|
for {
|
|
if lng < len(last) {
|
|
pfx = append(pfx, last[lng])
|
|
} else {
|
|
pfx = append(pfx, '_')
|
|
}
|
|
lng++
|
|
// is this prefix unused?
|
|
if _, ok := n.prefixToURL[string(pfx)]; !ok {
|
|
n.prefixToURL[string(pfx)] = ns
|
|
n.urlToPrefix[ns] = string(pfx)
|
|
n.prefixes = append(n.prefixes, string(pfx))
|
|
return string(pfx)
|
|
}
|
|
}
|
|
}
|
|
|
|
var ignorables = map[string]bool{
|
|
"w10": true,
|
|
"w14": true,
|
|
"wp14": true,
|
|
"w15": true,
|
|
"x15ac": true,
|
|
"w16se": true,
|
|
"w16cid": true,
|
|
"w16": true,
|
|
"w16cex": true,
|
|
}
|
|
|
|
func (n nsSet) applyToNode(a *any) {
|
|
if a.XMLName.Space == "" {
|
|
return
|
|
}
|
|
pfx := n.getPrefix(a.XMLName.Space)
|
|
a.XMLName.Space = ""
|
|
a.XMLName.Local = pfx + ":" + a.XMLName.Local
|
|
tmpAttr := a.Attrs
|
|
a.Attrs = nil
|
|
for _, attr := range tmpAttr {
|
|
// skip namespace prefix declaration atributes as we create them later
|
|
if attr.Name.Space == "xmlns" {
|
|
continue
|
|
}
|
|
if attr.Name.Space != "" {
|
|
pfx := n.getPrefix(attr.Name.Space)
|
|
attr.Name.Space = ""
|
|
attr.Name.Local = pfx + ":" + attr.Name.Local
|
|
}
|
|
a.Attrs = append(a.Attrs, attr)
|
|
}
|
|
for _, cn := range a.Nodes {
|
|
n.applyToNode(cn)
|
|
}
|
|
}
|
|
|
|
// collectNS walks a tree of nodes finding any non-default namespace being used
|
|
func (x *XSDAny) collectNS(ns *nsSet) {
|
|
if x.XMLName.Space != "" {
|
|
ns.getPrefix(x.XMLName.Space)
|
|
}
|
|
for _, attr := range x.Attrs {
|
|
if attr.Name.Space != "" && attr.Name.Space != "xmlns" {
|
|
ns.getPrefix(attr.Name.Space)
|
|
}
|
|
}
|
|
for _, n := range x.Nodes {
|
|
n.collectNS(ns)
|
|
}
|
|
}
|
|
|
|
func convertToXNodes(an []*any) []*XSDAny {
|
|
ret := []*XSDAny{}
|
|
for _, a := range an {
|
|
x := &XSDAny{}
|
|
x.XMLName = a.XMLName
|
|
x.Attrs = a.Attrs
|
|
x.Data = a.Data
|
|
x.Nodes = convertToXNodes(a.Nodes)
|
|
ret = append(ret, x)
|
|
}
|
|
return ret
|
|
}
|
|
func convertToNodes(xn []*XSDAny) []*any {
|
|
ret := []*any{}
|
|
for _, x := range xn {
|
|
a := &any{}
|
|
a.XMLName = x.XMLName
|
|
attrs := []xml.Attr{}
|
|
for _, attr := range x.Attrs {
|
|
if attr.Name.Local != "xmlns" {
|
|
attrs = append(attrs, attr)
|
|
}
|
|
}
|
|
a.Attrs = attrs
|
|
a.Data = x.Data
|
|
a.Nodes = convertToNodes(x.Nodes)
|
|
ret = append(ret, a)
|
|
}
|
|
return ret
|
|
}
|
|
|
|
// MarshalXML implements the xml.Marshaler interface.
|
|
func (x *XSDAny) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
|
|
start.Name = x.XMLName
|
|
start.Attr = x.Attrs
|
|
a := any{}
|
|
a.XMLName = x.XMLName
|
|
a.Attrs = x.Attrs
|
|
a.Data = x.Data
|
|
a.Nodes = convertToNodes(x.Nodes)
|
|
attrsToIgnore := []string{}
|
|
includeIgnorable := false
|
|
|
|
ns := nsSet{
|
|
urlToPrefix: map[string]string{},
|
|
prefixToURL: map[string]string{},
|
|
}
|
|
|
|
// collect any namespaces in use in the node tree
|
|
x.collectNS(&ns)
|
|
|
|
// apply our new namespaces to the node and its children
|
|
ns.applyToNode(&a)
|
|
|
|
// add our prefixes and namespaces to root element
|
|
for _, pfx := range ns.prefixes {
|
|
if _, ok := ignorables[pfx]; ok {
|
|
attrsToIgnore = append(attrsToIgnore, pfx)
|
|
}
|
|
ns := ns.prefixToURL[pfx]
|
|
a.Attrs = append(a.Attrs, xml.Attr{
|
|
Name: xml.Name{Local: "xmlns:" + pfx},
|
|
Value: ns,
|
|
})
|
|
if pfx == "mc" {
|
|
includeIgnorable = true
|
|
}
|
|
}
|
|
if includeIgnorable && len(attrsToIgnore) > 0 {
|
|
a.Attrs = append(a.Attrs, xml.Attr{
|
|
Name: xml.Name{Local: "mc:Ignorable"},
|
|
Value: strings.Join(attrsToIgnore, " "),
|
|
})
|
|
}
|
|
|
|
// finally write out our new element
|
|
return e.Encode(&a)
|
|
}
|