document: read/restore all contents of a Word 2016 file

Adds support for serializing/deserializing of:

- webSettings
- fontTable
- endnotes
- footnotes
- themes
- optional numbering
This commit is contained in:
Todd 2017-08-29 17:27:02 -05:00
parent 13f1b33002
commit 105ef6916e
23 changed files with 296 additions and 107 deletions

Binary file not shown.

Binary file not shown.

View File

@ -27,10 +27,13 @@ const (
SharedStingsType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"
// WML
HeaderType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
FooterType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
NumberingType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering"
FontTableType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable"
HeaderType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
FooterType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
NumberingType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering"
FontTableType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable"
WebSettingsType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings"
FootNotesType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes"
EndNotesType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes"
// PML
SlideType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"

File diff suppressed because one or more lines are too long

View File

@ -12,7 +12,6 @@ import (
"errors"
"fmt"
"reflect"
"sort"
)
// Any is the interface used for marshaling/unmarshaling xsd:any
@ -21,73 +20,6 @@ type Any interface {
UnmarshalXML(d *xml.Decoder, start xml.StartElement) error
}
// Raw is used to unmarshal raw XML when we see an unknown tag
type Raw struct {
XMLName xml.Name
Attrs []xml.Attr `xml:",any,attr"`
Value []byte `xml:",innerxml"`
}
// MarshalXML allows raw to have the Any interface.
func (r *Raw) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
tmp := struct {
Attrs []xml.Attr `xml:",any,attr"`
Value []byte `xml:",innerxml"`
}{}
tmp.Value = r.Value
start.Attr = nil
s := xml.StartElement{Name: r.XMLName}
tmpAttrs := make([]xml.Attr, len(r.Attrs))
copy(tmpAttrs, r.Attrs)
// fix namespaces in the element we're about to write
for i := 0; i < len(tmpAttrs); i++ {
attr := tmpAttrs[i]
// we unmarshaled an xmlns:foo="http:/foo.com" attribute for a <foo:bar/> element
if attr.Name.Space == "xmlns" && attr.Value == r.XMLName.Space {
// add xmlns:foo="http://foo.com" to the element
s.Attr = append(s.Attr, xml.Attr{Name: xml.Name{Local: "xmlns:" + attr.Name.Local}, Value: s.Name.Space})
s.Name.Local = attr.Name.Local + ":" + s.Name.Local
// rewrite <bar xmlns:foo="http://foo.com"/> to <foo:bar xmlns:foo="http://foo.com"/>
// nuke our namespace which would have been put as xmlns="http://foo.com"
s.Name.Space = ""
} else if attr.Name.Space == "xmlns" || attr.Name.Local == "xmlns" {
} else {
s.Attr = append(s.Attr, attr)
}
}
// ensure consistent output
sort.Slice(s.Attr, func(i, j int) bool {
if s.Attr[i].Name.Space != s.Attr[j].Name.Space {
return s.Attr[i].Name.Space < s.Attr[j].Name.Space
}
return s.Attr[i].Name.Local < s.Attr[j].Name.Local
})
if err := e.EncodeElement(tmp, s); err != nil {
return err
}
return nil
}
// UnmarshalXML allows raw to have the Any interface.
func (r *Raw) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
tmp := struct {
XMLName xml.Name
Attrs []xml.Attr `xml:",any,attr"`
Value []byte `xml:",innerxml"`
}{}
if err := d.DecodeElement(&tmp, &start); err != nil {
return err
}
r.XMLName = tmp.XMLName
r.Attrs = tmp.Attrs
r.Value = tmp.Value
return nil
}
var creatorFns = map[string]interface{}{}
// RegisterConstructor registers a constructor function used for unmarshaling
@ -100,7 +32,7 @@ func RegisterConstructor(ns, name string, fn interface{}) {
func CreateElement(start xml.StartElement) (Any, error) {
fn, ok := creatorFns[start.Name.Space+"/"+start.Name.Local]
if !ok {
r := &Raw{}
r := &XSDAny{}
return r, nil
}

View File

@ -26,7 +26,10 @@ func TestRawEncode(t *testing.T) {
}
dec := xml.NewDecoder(f)
var got *bytes.Buffer
for i := 0; i < 2; i++ {
// should round trip multiple times with no changes after
// the first encoding
for i := 0; i < 5; i++ {
stng := wml.NewSettings()
if err := dec.Decode(stng); err != nil {
t.Errorf("error decoding settings: %s", err)
@ -41,12 +44,14 @@ func TestRawEncode(t *testing.T) {
dec = xml.NewDecoder(bytes.NewReader(got.Bytes()))
}
xmlStr := got.String()
beg := strings.Index(xmlStr, "<w:hdrShapeDefaults>")
end := strings.Index(xmlStr, "</w:hdrShapeDefaults>")
beg := strings.LastIndex(xmlStr, "<w:hdrShapeDefaults>")
end := strings.LastIndex(xmlStr, "</w:hdrShapeDefaults>")
gotRaw := xmlStr[beg+20 : end]
exp := `<shapedefaults xmlns="urn:schemas-microsoft-com:office:office" spidmax="2049" xmlns:_="urn:schemas-microsoft-com:vml" _:ext="edit"/>`
exp := "<o:shapedefaults v:ext=\"edit\" spidmax=\"2049\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:v=\"urn:schemas-microsoft-com:vml\"><o:idmap v:ext=\"edit\" data=\"1\"/></o:shapedefaults>"
if gotRaw != exp {
t.Errorf("expected\n%q\ngot\n%q\n", exp, gotRaw)
}
}

View File

@ -17,9 +17,9 @@ import (
"os"
"path/filepath"
"baliance.com/gooxml/schema/schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
"baliance.com/gooxml/common"
dml "baliance.com/gooxml/schema/schemas.openxmlformats.org/drawingml"
st "baliance.com/gooxml/schema/schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
wml "baliance.com/gooxml/schema/schemas.openxmlformats.org/wordprocessingml"
"baliance.com/gooxml/zippkg"
@ -28,14 +28,19 @@ import (
// Document is a text document that can be written out in the OOXML .docx format.
type Document struct {
common.DocBase
x *wml.Document
Settings Settings
Numbering Numbering
Styles Styles
headers []*wml.Hdr
footers []*wml.Ftr
docRels common.Relationships
images []*iref
x *wml.Document
Settings Settings
Numbering Numbering
Styles Styles
headers []*wml.Hdr
footers []*wml.Ftr
docRels common.Relationships
images []*iref
themes []*dml.Theme
webSettings *wml.WebSettings
fontTable *wml.Fonts
endNotes *wml.Endnotes
footNotes *wml.Footnotes
}
// New constructs an empty document that content can be added to.
@ -43,7 +48,7 @@ func New() *Document {
d := &Document{x: wml.NewDocument()}
d.ContentTypes = common.NewContentTypes()
d.x.Body = wml.NewCT_Body()
d.x.ConformanceAttr = sharedTypes.ST_ConformanceClassTransitional
d.x.ConformanceAttr = st.ST_ConformanceClassTransitional
d.docRels = common.NewRelationships()
d.AppProperties = common.NewAppProperties()
@ -138,8 +143,10 @@ func (d *Document) Save(w io.Writer) error {
if err := zippkg.MarshalXML(z, "word/document.xml", d.x); err != nil {
return err
}
if err := zippkg.MarshalXML(z, "word/numbering.xml", d.Numbering.X()); err != nil {
return err
if d.Numbering.X() != nil {
if err := zippkg.MarshalXML(z, "word/numbering.xml", d.Numbering.X()); err != nil {
return err
}
}
if err := zippkg.MarshalXML(z, "word/styles.xml", d.Styles.X()); err != nil {
return err
@ -147,6 +154,32 @@ func (d *Document) Save(w io.Writer) error {
if err := zippkg.MarshalXML(z, "word/_rels/document.xml.rels", d.docRels.X()); err != nil {
return err
}
if d.webSettings != nil {
if err := zippkg.MarshalXML(z, "word/webSettings.xml", d.webSettings); err != nil {
return err
}
}
if d.fontTable != nil {
if err := zippkg.MarshalXML(z, "word/fontTable.xml", d.fontTable); err != nil {
return err
}
}
if d.endNotes != nil {
if err := zippkg.MarshalXML(z, "word/endnotes.xml", d.endNotes); err != nil {
return err
}
}
if d.footNotes != nil {
if err := zippkg.MarshalXML(z, "word/footnotes.xml", d.footNotes); err != nil {
return err
}
}
for i, thm := range d.themes {
if err := zippkg.MarshalXML(z, fmt.Sprintf("word/theme/theme%d.xml", i+1), thm); err != nil {
return err
}
}
for i, hdr := range d.headers {
fn := fmt.Sprintf("word/header%d.xml", i+1)
if err := zippkg.MarshalXML(z, fn, hdr); err != nil {
@ -242,6 +275,9 @@ func Open(filename string) (*Document, error) {
// Read reads a document from an io.Reader.
func Read(r io.ReaderAt, size int64) (*Document, error) {
doc := New()
// numbering is not required
doc.Numbering.x = nil
td, err := ioutil.TempDir("", "gooxml-docx")
if err != nil {
return nil, err
@ -301,7 +337,7 @@ func Read(r io.ReaderAt, size int64) (*Document, error) {
case common.SettingsType:
decMap[basePaths[doc.docRels]+r.Target()] = doc.Settings.X()
case common.NumberingType:
doc.Numbering.Clear()
doc.Numbering = NewNumbering()
decMap[basePaths[doc.docRels]+r.Target()] = doc.Numbering.X()
case common.StylesType:
doc.Styles.Clear()
@ -314,6 +350,22 @@ func Read(r io.ReaderAt, size int64) (*Document, error) {
ftr := wml.NewFtr()
doc.footers = append(doc.footers, ftr)
decMap[basePaths[doc.docRels]+r.Target()] = ftr
case common.ThemeType:
thm := dml.NewTheme()
doc.themes = append(doc.themes, thm)
decMap[basePaths[doc.docRels]+r.Target()] = thm
case common.WebSettingsType:
doc.webSettings = wml.NewWebSettings()
decMap[basePaths[doc.docRels]+r.Target()] = doc.webSettings
case common.FontTableType:
doc.fontTable = wml.NewFonts()
decMap[basePaths[doc.docRels]+r.Target()] = doc.fontTable
case common.EndNotesType:
doc.endNotes = wml.NewEndnotes()
decMap[basePaths[doc.docRels]+r.Target()] = doc.endNotes
case common.FootNotesType:
doc.footNotes = wml.NewFootnotes()
decMap[basePaths[doc.docRels]+r.Target()] = doc.footNotes
case common.ImageType:
imgPath := basePaths[doc.docRels] + r.Target()
for i, f := range files {

View File

@ -39,7 +39,7 @@ func TestOpen(t *testing.T) {
t.Errorf("created an invalid document: %s", err)
}
wb.Save(&got)
testhelper.CompareZip(t, "simple-1.docx", got.Bytes())
testhelper.CompareZip(t, "simple-1.docx", got.Bytes(), true)
}
func TestOpenHeaderFooter(t *testing.T) {
@ -70,3 +70,16 @@ func TestAddParagraph(t *testing.T) {
t.Errorf("expected 2 paragraphs, got %d", len(doc.Paragraphs()))
}
}
func TestOpenWord2016(t *testing.T) {
doc, err := document.Open("../testdata/Office2016/Word-Windows.docx")
if err != nil {
t.Errorf("error opening Windows Word 2016 document: %s", err)
}
got := bytes.Buffer{}
if err := doc.Save(&got); err != nil {
t.Errorf("error saving W216 file: %s", err)
}
testhelper.CompareGoldenZipFilesOnly(t, "../../testdata/Office2016/Word-Windows.docx", got.Bytes())
doc.SaveToFile("/tmp/gen/rewrote.docx")
}

Binary file not shown.

View File

@ -1,2 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:settings xmlns="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:ma="http://schemas.openxmlformats.org/schemaLibrary/2006/main" xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:xml="http://www.w3.org/XML/1998/namespace"><w:zoom w:percent="100"/><w:removePersonalInformation/><w:removeDateAndTime/><w:defaultTabStop w:val="720"/><w:characterSpacingControl w:val="doNotCompress"/><w:hdrShapeDefaults><shapedefaults xmlns="urn:schemas-microsoft-com:office:office" spidmax="2049" xmlns:_="urn:schemas-microsoft-com:vml" _:ext="edit"/></w:hdrShapeDefaults><w:footnotePr><w:footnote w:id="-1"/><w:footnote w:id="0"/></w:footnotePr><w:endnotePr><w:endnote w:id="-1"/><w:endnote w:id="0"/></w:endnotePr><w:compat><w:compatSetting w:name="compatibilityMode" w:uri="http://schemas.microsoft.com/office/word" w:val="15"/><w:compatSetting w:name="overrideTableStyleFontSizeAndJustification" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="enableOpenTypeFeatures" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="doNotFlipMirrorIndents" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="differentiateMultirowTableHeaders" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/></w:compat><w:rsids><w:rsidRoot w:val="003C2D97"/><w:rsid w:val="002939C2"/><w:rsid w:val="003C2D97"/><w:rsid w:val="00A45D78"/></w:rsids><m:mathPr xmlns="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:xml="http://www.w3.org/XML/1998/namespace"><m:mathFont m:val="Cambria Math"/><m:brkBin m:val="before"/><m:brkBinSub m:val="--"/><m:smallFrac m:val=""/><m:dispDef/><m:lMargin m:val="0"/><m:rMargin m:val="0"/><m:defJc m:val="centerGroup"/><m:wrapIndent m:val="1440"/><m:intLim m:val="subSup"/><m:naryLim m:val="undOvr"/></m:mathPr><w:themeFontLang w:val="en-US"/><w:clrSchemeMapping w:bg1="light1" w:t1="dark1" w:bg2="light2" w:t2="dark2" w:accent1="accent1" w:accent2="accent2" w:accent3="accent3" w:accent4="accent4" w:accent5="accent5" w:accent6="accent6" w:hyperlink="hyperlink" w:followedHyperlink="followedHyperlink"/><w:shapeDefaults><shapedefaults xmlns="urn:schemas-microsoft-com:office:office" spidmax="2049" xmlns:_="urn:schemas-microsoft-com:vml" _:ext="edit"/><shapelayout xmlns="urn:schemas-microsoft-com:office:office" xmlns:_="urn:schemas-microsoft-com:vml" _:ext="edit"><o:idmap v:ext="edit" data="1"/></shapelayout></w:shapeDefaults><w:decimalSymbol w:val="."/><w:listSeparator w:val=","/></w:settings>
<w:settings xmlns="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:ma="http://schemas.openxmlformats.org/schemaLibrary/2006/main" xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:xml="http://www.w3.org/XML/1998/namespace"><w:zoom w:percent="100"/><w:removePersonalInformation/><w:removeDateAndTime/><w:defaultTabStop w:val="720"/><w:characterSpacingControl w:val="doNotCompress"/><w:hdrShapeDefaults><o:shapedefaults v:ext="edit" spidmax="2049" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:v="urn:schemas-microsoft-com:vml"/></w:hdrShapeDefaults><w:footnotePr><w:footnote w:id="-1"/><w:footnote w:id="0"/></w:footnotePr><w:endnotePr><w:endnote w:id="-1"/><w:endnote w:id="0"/></w:endnotePr><w:compat><w:compatSetting w:name="compatibilityMode" w:uri="http://schemas.microsoft.com/office/word" w:val="15"/><w:compatSetting w:name="overrideTableStyleFontSizeAndJustification" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="enableOpenTypeFeatures" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="doNotFlipMirrorIndents" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="differentiateMultirowTableHeaders" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/></w:compat><w:rsids><w:rsidRoot w:val="003C2D97"/><w:rsid w:val="002939C2"/><w:rsid w:val="003C2D97"/><w:rsid w:val="00A45D78"/></w:rsids><m:mathPr xmlns="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:xml="http://www.w3.org/XML/1998/namespace"><m:mathFont m:val="Cambria Math"/><m:brkBin m:val="before"/><m:brkBinSub m:val="--"/><m:smallFrac m:val="false"/><m:dispDef/><m:lMargin m:val="0"/><m:rMargin m:val="0"/><m:defJc m:val="centerGroup"/><m:wrapIndent m:val="1440"/><m:intLim m:val="subSup"/><m:naryLim m:val="undOvr"/></m:mathPr><w:themeFontLang w:val="en-US"/><w:clrSchemeMapping w:bg1="light1" w:t1="dark1" w:bg2="light2" w:t2="dark2" w:accent1="accent1" w:accent2="accent2" w:accent3="accent3" w:accent4="accent4" w:accent5="accent5" w:accent6="accent6" w:hyperlink="hyperlink" w:followedHyperlink="followedHyperlink"/><w:shapeDefaults><o:shapedefaults v:ext="edit" spidmax="2049" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:v="urn:schemas-microsoft-com:vml"/><o:shapelayout v:ext="edit" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:v="urn:schemas-microsoft-com:vml"><o:idmap v:ext="edit" data="1"/></o:shapelayout></w:shapeDefaults><w:decimalSymbol w:val="."/><w:listSeparator w:val=","/></w:settings>

Binary file not shown.

View File

@ -17,7 +17,7 @@ import (
)
func ParseUnionST_OnOff(s string) (sharedTypes.ST_OnOff, error) {
return sharedTypes.ST_OnOff{}, nil
return sharedTypes.ParseUnionST_OnOff(s)
}
func ParseUnionST_TwipsMeasure(s string) (sharedTypes.ST_TwipsMeasure, error) {
ret := sharedTypes.ST_TwipsMeasure{}

View File

@ -43,11 +43,13 @@ var ST_PositiveFixedPercentagePatternRe = regexp.MustCompile(ST_PositiveFixedPer
func ParseUnionST_OnOff(s string) (ST_OnOff, error) {
r := ST_OnOff{}
tru := true
switch s {
case "true", "1", "on":
tru := true
r.Bool = &tru
default:
fals := false
r.Bool = &fals
}
return r, nil
}

View File

@ -89,7 +89,7 @@ func TestOpen(t *testing.T) {
t.Errorf("created an invalid spreadsheet: %s", err)
}
wb.Save(&got)
testhelper.CompareZip(t, "simple-1.xlsx", got.Bytes())
testhelper.CompareZip(t, "simple-1.xlsx", got.Bytes(), true)
}
func TestOpenExcel2016(t *testing.T) {

View File

@ -1,2 +1,3 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:settings xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:sl="http://schemas.openxmlformats.org/schemaLibrary/2006/main" mc:Ignorable="w14 w15 w16se"><w:zoom w:percent="100"/><w:removePersonalInformation/><w:removeDateAndTime/><w:defaultTabStop w:val="720"/><w:characterSpacingControl w:val="doNotCompress"/><w:hdrShapeDefaults><o:shapedefaults v:ext="edit" spidmax="2049"/></w:hdrShapeDefaults><w:footnotePr><w:footnote w:id="-1"/><w:footnote w:id="0"/></w:footnotePr><w:endnotePr><w:endnote w:id="-1"/><w:endnote w:id="0"/></w:endnotePr><w:compat><w:compatSetting w:name="compatibilityMode" w:uri="http://schemas.microsoft.com/office/word" w:val="15"/><w:compatSetting w:name="overrideTableStyleFontSizeAndJustification" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="enableOpenTypeFeatures" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="doNotFlipMirrorIndents" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="differentiateMultirowTableHeaders" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/></w:compat><w:rsids><w:rsidRoot w:val="003C2D97"/><w:rsid w:val="002939C2"/><w:rsid w:val="003C2D97"/><w:rsid w:val="00A45D78"/></w:rsids><m:mathPr><m:mathFont m:val="Cambria Math"/><m:brkBin m:val="before"/><m:brkBinSub m:val="--"/><m:smallFrac m:val="0"/><m:dispDef/><m:lMargin m:val="0"/><m:rMargin m:val="0"/><m:defJc m:val="centerGroup"/><m:wrapIndent m:val="1440"/><m:intLim m:val="subSup"/><m:naryLim m:val="undOvr"/></m:mathPr><w:themeFontLang w:val="en-US"/><w:clrSchemeMapping w:bg1="light1" w:t1="dark1" w:bg2="light2" w:t2="dark2" w:accent1="accent1" w:accent2="accent2" w:accent3="accent3" w:accent4="accent4" w:accent5="accent5" w:accent6="accent6" w:hyperlink="hyperlink" w:followedHyperlink="followedHyperlink"/><w:shapeDefaults><o:shapedefaults v:ext="edit" spidmax="2049"/><o:shapelayout v:ext="edit"><o:idmap v:ext="edit" data="1"/></o:shapelayout></w:shapeDefaults><w:decimalSymbol w:val="."/><w:listSeparator w:val=","/><w15:chartTrackingRefBased/></w:settings>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:settings xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:sl="http://schemas.openxmlformats.org/schemaLibrary/2006/main" mc:Ignorable="w14 w15 w16se">
<w:hdrShapeDefaults><o:shapedefaults v:ext="edit" spidmax="2049"><o:idmap v:ext="edit" data="1"/></o:shapedefaults></w:hdrShapeDefaults></w:settings>

View File

@ -36,7 +36,7 @@ func CompareGoldenXML(t *testing.T, expectedFn string, got []byte) {
dumpXmlDiff(t, exp, got)
}
func CompareZip(t *testing.T, expectedFn string, got []byte) {
func CompareZip(t *testing.T, expectedFn string, got []byte, cmpFileContents bool) {
golden := filepath.Join("testdata", expectedFn)
zgot, err := zip.NewReader(bytes.NewReader(got), int64(len(got)))
if err != nil {
@ -57,8 +57,19 @@ func CompareZip(t *testing.T, expectedFn string, got []byte) {
if err != nil {
t.Errorf("unable to read file: %s", err)
}
t.Run(expectedFn, compareZipContents(zexp, zgot))
t.Run(expectedFn, compareZipContents(zexp, zgot, cmpFileContents))
}
func CompareGoldenZipFilesOnly(t *testing.T, expectedFn string, got []byte) {
golden := filepath.Join("testdata", expectedFn)
if *update {
if err := ioutil.WriteFile(golden, got, 0644); err != nil {
t.Fatal(err)
}
}
CompareZip(t, expectedFn, got, false)
}
func CompareGoldenZip(t *testing.T, expectedFn string, got []byte) {
golden := filepath.Join("testdata", expectedFn)
if *update {
@ -66,10 +77,10 @@ func CompareGoldenZip(t *testing.T, expectedFn string, got []byte) {
t.Fatal(err)
}
}
CompareZip(t, expectedFn, got)
CompareZip(t, expectedFn, got, true)
}
func compareZipContents(exp, got *zip.Reader) func(t *testing.T) {
func compareZipContents(exp, got *zip.Reader, cmpFileContents bool) func(t *testing.T) {
return func(t *testing.T) {
expFiles := make([]*zip.File, len(exp.File))
copy(expFiles, exp.File)
@ -85,8 +96,10 @@ func compareZipContents(exp, got *zip.Reader) func(t *testing.T) {
continue
}
if f.Name == g.Name {
// comparing ones that have the same name
t.Run(f.Name, compareFiles(f, g))
if cmpFileContents {
// comparing contents that have the same name
t.Run(f.Name, compareFiles(f, g))
}
expFiles[i] = nil
gotFiles[j] = nil
}

168
xsdany.go Normal file
View File

@ -0,0 +1,168 @@
package gooxml
import (
"encoding/xml"
"log"
"strings"
)
// XSDAny is used to marshal/unmarshal xsd:any types in the OOXML schema.
type XSDAny struct {
Attrs []xml.Attr
Tokens []xml.Token
}
func cloneToken(tok xml.Token) xml.Token {
switch el := tok.(type) {
case xml.CharData:
cd := xml.CharData{}
cd = append(cd, el...)
return cd
case xml.StartElement, xml.EndElement:
return tok
default:
log.Fatalf("need to suppot %T", el)
}
return nil
}
// UnmarshalXML implements the xml.Unmarshaler interface.
func (x *XSDAny) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
x.Tokens = append(x.Tokens, cloneToken(start))
lfor:
for {
tok, err := d.Token()
if err != nil {
return err
}
switch el := tok.(type) {
default:
x.Tokens = append(x.Tokens, cloneToken(tok))
case xml.EndElement:
x.Tokens = append(x.Tokens, cloneToken(tok))
if el.Name == start.Name {
break lfor
}
}
}
return nil
}
type nsSet struct {
urlToPrefix map[string]string
prefixToURL map[string]string
prefixes []string //required for deterministic output
}
func (n *nsSet) getPrefix(ns string) string {
// do we have a prefix for this ns?
if sc, ok := n.urlToPrefix[ns]; ok {
return sc
}
// determine the last path portion of the namespace
// "urn:schemas-microsoft-com:office:office" = "office"
// "http://schemas.microsoft.com/office/word/2012/wordml" = "wordml"
split := strings.Split(ns, "/")
split = strings.Split(split[len(split)-1], ":")
// last segment of the namesapce
last := split[len(split)-1]
lng := 0
pfx := []byte{}
for {
if lng < len(last) {
pfx = append(pfx, last[lng])
} else {
pfx = append(pfx, '_')
}
lng++
// is this prefix unused?
if _, ok := n.prefixToURL[string(pfx)]; !ok {
n.prefixToURL[string(pfx)] = ns
n.urlToPrefix[ns] = string(pfx)
n.prefixes = append(n.prefixes, string(pfx))
return string(pfx)
}
}
}
func (n nsSet) applyToSE(se *xml.StartElement) {
if se.Name.Space == "" {
return
}
pfx := n.getPrefix(se.Name.Space)
se.Name.Space = ""
se.Name.Local = pfx + ":" + se.Name.Local
tmpAttr := se.Attr
se.Attr = nil
for _, attr := range tmpAttr {
// skip these as we create them later
if attr.Name.Space == "xmlns" {
continue
}
if attr.Name.Space != "" {
pfx := n.getPrefix(attr.Name.Space)
attr.Name.Space = ""
attr.Name.Local = pfx + ":" + attr.Name.Local
}
se.Attr = append(se.Attr, attr)
}
}
func (n nsSet) applyToEE(ee *xml.EndElement) {
if ee.Name.Space == "" {
return
}
pfx := n.getPrefix(ee.Name.Space)
ee.Name.Space = ""
ee.Name.Local = pfx + ":" + ee.Name.Local
}
// MarshalXML implements the xml.Marshaler interface.
func (x *XSDAny) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
if len(x.Tokens) == 0 {
return nil
}
ns := nsSet{urlToPrefix: make(map[string]string),
prefixToURL: make(map[string]string)}
// collect the namespaces
for _, tok := range x.Tokens {
if se, ok := tok.(xml.StartElement); ok {
if se.Name.Space != "" {
ns.getPrefix(se.Name.Space)
}
for _, attr := range se.Attr {
if attr.Name.Space != "" && attr.Name.Space != "xmlns" {
ns.getPrefix(attr.Name.Space)
}
}
}
}
// iniital element must be a StartElement
se := x.Tokens[0].(xml.StartElement)
ns.applyToSE(&se)
// add namespaces to first element
for _, pfx := range ns.prefixes {
ns := ns.prefixToURL[pfx]
se.Attr = append(se.Attr, xml.Attr{
Name: xml.Name{Local: "xmlns:" + pfx},
Value: ns,
})
}
e.EncodeToken(se)
for _, tok := range x.Tokens[1:] {
if se, ok := tok.(xml.StartElement); ok {
ns.applyToSE(&se)
e.EncodeToken(se)
} else if ee, ok := tok.(xml.EndElement); ok {
ns.applyToEE(&ee)
e.EncodeToken(ee)
} else if err := e.EncodeToken(tok); err != nil {
return err
}
}
return nil
}