unioffice/document/document.go
2017-09-07 22:05:29 -04:00

550 lines
16 KiB
Go

// Copyright 2017 Baliance. All rights reserved.
//
// Use of this source code is governed by the terms of the Affero GNU General
// Public License version 3.0 as published by the Free Software Foundation and
// appearing in the file LICENSE included in the packaging of this file. A
// commercial license can be purchased by contacting sales@baliance.com.
package document
import (
"archive/zip"
"errors"
"fmt"
"image"
"image/jpeg"
"io"
"io/ioutil"
"log"
"os"
"baliance.com/gooxml"
"baliance.com/gooxml/common"
"baliance.com/gooxml/zippkg"
dml "baliance.com/gooxml/schema/schemas.openxmlformats.org/drawingml"
st "baliance.com/gooxml/schema/schemas.openxmlformats.org/officeDocument/2006/sharedTypes"
"baliance.com/gooxml/schema/schemas.openxmlformats.org/package/2006/relationships"
wml "baliance.com/gooxml/schema/schemas.openxmlformats.org/wordprocessingml"
)
// Document is a text document that can be written out in the OOXML .docx
// format. It can be opened from a file on disk and modified, or created from
// scratch.
type Document struct {
common.DocBase
x *wml.Document
Settings Settings // document settings
Numbering Numbering // numbering styles within the doucment
Styles Styles // styles that are use and can be used within the document
headers []*wml.Hdr
footers []*wml.Ftr
docRels common.Relationships
images []*iref
themes []*dml.Theme
webSettings *wml.WebSettings
fontTable *wml.Fonts
endNotes *wml.Endnotes
footNotes *wml.Footnotes
}
// New constructs an empty document that content can be added to.
func New() *Document {
d := &Document{x: wml.NewDocument()}
d.ContentTypes = common.NewContentTypes()
d.x.Body = wml.NewCT_Body()
d.x.ConformanceAttr = st.ST_ConformanceClassTransitional
d.docRels = common.NewRelationships()
d.AppProperties = common.NewAppProperties()
d.CoreProperties = common.NewCoreProperties()
d.ContentTypes.AddOverride("/word/document.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
d.Settings = NewSettings()
d.docRels.AddRelationship("settings.xml", gooxml.SettingsType)
d.ContentTypes.AddOverride("/word/settings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml")
d.Rels = common.NewRelationships()
d.Rels.AddRelationship("docProps/core.xml", gooxml.CorePropertiesType)
d.Rels.AddRelationship("docProps/app.xml", gooxml.ExtendedPropertiesType)
d.Rels.AddRelationship("word/document.xml", gooxml.OfficeDocumentType)
d.Numbering = NewNumbering()
d.Numbering.InitializeDefault()
d.ContentTypes.AddOverride("/word/numbering.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml")
d.docRels.AddRelationship("numbering.xml", gooxml.NumberingType)
d.Styles = NewStyles()
d.Styles.InitializeDefault()
d.ContentTypes.AddOverride("/word/styles.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml")
d.docRels.AddRelationship("styles.xml", gooxml.StylesType)
d.x.Body = wml.NewCT_Body()
return d
}
// X returns the inner wrapped XML type.
func (d *Document) X() *wml.Document {
return d.x
}
// AddHeader creates a header associated with the document, but doesn't add it
// to the document for display.
func (d *Document) AddHeader() Header {
hdr := wml.NewHdr()
d.headers = append(d.headers, hdr)
path := fmt.Sprintf("header%d.xml", len(d.headers))
d.docRels.AddRelationship(path, gooxml.HeaderType)
d.ContentTypes.AddOverride("/word/"+path, "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")
return Header{d, hdr}
}
// AddFooter creates a Footer associated with the document, but doesn't add it
// to the document for display.
func (d *Document) AddFooter() Footer {
ftr := wml.NewFtr()
d.footers = append(d.footers, ftr)
path := fmt.Sprintf("footer%d.xml", len(d.footers))
d.docRels.AddRelationship(path, gooxml.FooterType)
d.ContentTypes.AddOverride("/word/"+path, "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")
return Footer{d, ftr}
}
// BodySection returns the default body section used for all preceding
// paragraphs until the previous Section. If there is no previous sections, the
// body section applies to the entire document.
func (d *Document) BodySection() Section {
if d.x.Body.SectPr == nil {
d.x.Body.SectPr = wml.NewCT_SectPr()
}
return Section{d, d.x.Body.SectPr}
}
// Save writes the document to an io.Writer in the Zip package format.
func (d *Document) Save(w io.Writer) error {
if err := d.x.Validate(); err != nil {
log.Printf("validation error in document: %s", err)
}
z := zip.NewWriter(w)
defer z.Close()
if err := zippkg.MarshalXML(z, "_rels/.rels", d.Rels.X()); err != nil {
return err
}
if err := zippkg.MarshalXML(z, "docProps/app.xml", d.AppProperties.X()); err != nil {
return err
}
if err := zippkg.MarshalXML(z, "docProps/core.xml", d.CoreProperties.X()); err != nil {
return err
}
if d.Thumbnail != nil {
tn, err := z.Create("docProps/thumbnail.jpeg")
if err != nil {
return err
}
if err := jpeg.Encode(tn, d.Thumbnail, nil); err != nil {
return err
}
}
if err := zippkg.MarshalXML(z, "word/settings.xml", d.Settings.X()); err != nil {
return err
}
if err := zippkg.MarshalXML(z, "word/document.xml", d.x); err != nil {
return err
}
if d.Numbering.X() != nil {
if err := zippkg.MarshalXML(z, "word/numbering.xml", d.Numbering.X()); err != nil {
return err
}
}
if err := zippkg.MarshalXML(z, "word/styles.xml", d.Styles.X()); err != nil {
return err
}
if err := zippkg.MarshalXML(z, "word/_rels/document.xml.rels", d.docRels.X()); err != nil {
return err
}
if d.webSettings != nil {
if err := zippkg.MarshalXML(z, "word/webSettings.xml", d.webSettings); err != nil {
return err
}
}
if d.fontTable != nil {
if err := zippkg.MarshalXML(z, "word/fontTable.xml", d.fontTable); err != nil {
return err
}
}
if d.endNotes != nil {
if err := zippkg.MarshalXML(z, "word/endnotes.xml", d.endNotes); err != nil {
return err
}
}
if d.footNotes != nil {
if err := zippkg.MarshalXML(z, "word/footnotes.xml", d.footNotes); err != nil {
return err
}
}
for i, thm := range d.themes {
if err := zippkg.MarshalXML(z, fmt.Sprintf("word/theme/theme%d.xml", i+1), thm); err != nil {
return err
}
}
for i, hdr := range d.headers {
fn := fmt.Sprintf("word/header%d.xml", i+1)
if err := zippkg.MarshalXML(z, fn, hdr); err != nil {
return err
}
}
for i, ftr := range d.footers {
fn := fmt.Sprintf("word/footer%d.xml", i+1)
if err := zippkg.MarshalXML(z, fn, ftr); err != nil {
return err
}
}
for i, img := range d.images {
fn := fmt.Sprintf("word/media/image%d.png", i+1)
if img.path != "" {
if err := zippkg.AddFileFromDisk(z, fn, img.path); err != nil {
return err
}
} else {
log.Printf("unsupported image source: %+v", img)
}
}
if err := zippkg.MarshalXML(z, "[Content_Types].xml", d.ContentTypes.X()); err != nil {
return err
}
if err := d.WriteExtraFiles(z); err != nil {
return err
}
return z.Close()
}
// AddTable adds a new table to the document body.
func (d *Document) AddTable() Table {
elts := wml.NewEG_BlockLevelElts()
d.x.Body.EG_BlockLevelElts = append(d.x.Body.EG_BlockLevelElts, elts)
c := wml.NewEG_ContentBlockContent()
elts.EG_ContentBlockContent = append(elts.EG_ContentBlockContent, c)
tbl := wml.NewCT_Tbl()
c.Tbl = append(c.Tbl, tbl)
return Table{d, tbl}
}
// AddParagraph adds a new paragraph to the document body.
func (d *Document) AddParagraph() Paragraph {
elts := wml.NewEG_BlockLevelElts()
d.x.Body.EG_BlockLevelElts = append(d.x.Body.EG_BlockLevelElts, elts)
c := wml.NewEG_ContentBlockContent()
elts.EG_ContentBlockContent = append(elts.EG_ContentBlockContent, c)
p := wml.NewCT_P()
c.P = append(c.P, p)
return Paragraph{d, p}
}
// Paragraphs returns all of the paragraphs in the document body.
func (d *Document) Paragraphs() []Paragraph {
ret := []Paragraph{}
if d.x.Body == nil {
return nil
}
for _, ble := range d.x.Body.EG_BlockLevelElts {
for _, c := range ble.EG_ContentBlockContent {
for _, p := range c.P {
ret = append(ret, Paragraph{d, p})
}
}
}
return ret
}
// SaveToFile writes the document out to a file.
func (d *Document) SaveToFile(path string) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return d.Save(f)
}
// Open opens and reads a document from a file (.docx).
func Open(filename string) (*Document, error) {
f, err := os.Open(filename)
if err != nil {
return nil, fmt.Errorf("error opening %s: %s", filename, err)
}
defer f.Close()
fi, err := os.Stat(filename)
if err != nil {
return nil, fmt.Errorf("error opening %s: %s", filename, err)
}
_ = fi
return Read(f, fi.Size())
}
// OpenTemplate opens a document, removing all content so it can be used as a
// template. Since Word removes unused styles from a document upon save, to
// create a template in Word add a paragraph with every style of interest. When
// opened with OpenTemplate the document's styles will be available but the
// content will be gone.
func OpenTemplate(filename string) (*Document, error) {
d, err := Open(filename)
if err != nil {
return nil, err
}
d.x.Body = wml.NewCT_Body()
return d, nil
}
// Read reads a document from an io.Reader.
func Read(r io.ReaderAt, size int64) (*Document, error) {
doc := New()
// numbering is not required
doc.Numbering.x = nil
td, err := ioutil.TempDir("", "gooxml-docx")
if err != nil {
return nil, err
}
doc.TmpPath = td
zr, err := zip.NewReader(r, size)
if err != nil {
return nil, fmt.Errorf("parsing zip: %s", err)
}
files := []*zip.File{}
files = append(files, zr.File...)
decMap := zippkg.DecodeMap{}
decMap.SetOnNewRelationshipFunc(doc.onNewRelationship)
// we should discover all contents by starting with these two files
decMap.AddTarget(zippkg.ContentTypesFilename, doc.ContentTypes.X())
decMap.AddTarget(zippkg.BaseRelsFilename, doc.Rels.X())
decMap.Decode(files)
for _, f := range files {
if f == nil {
continue
}
if err := doc.AddExtraFileFromZip(f); err != nil {
return nil, err
}
}
return doc, nil
}
// Validate validates the structure and in cases where it't possible, the ranges
// of elements within a document. A validation error dones't mean that the
// document won't work in MS Word or LibreOffice, but it's worth checking into.
func (d *Document) Validate() error {
if d == nil || d.x == nil {
return errors.New("document not initialized correctly, nil base")
}
if err := d.x.Validate(); err != nil {
return err
}
return nil
}
// AddImage adds an image to the document package, returning a reference that
// can be used to add the image to a run and place it in the document contents.
func (d *Document) AddImage(i Image) (ImageRef, error) {
r := ImageRef{img: i, d: d}
if i.Path != "" {
r.ref = &iref{path: i.Path}
d.images = append(d.images, r.ref)
} else {
return r, errors.New("image must have a path")
}
if i.Format == "" {
return r, errors.New("image must have a valid format")
}
if i.Size.X == 0 || i.Size.Y == 0 {
return r, errors.New("image must have a valid size")
}
fn := fmt.Sprintf("media/image%d.%s", len(d.images), i.Format)
d.docRels.AddRelationship(fn, gooxml.ImageType)
return r, nil
}
// GetImageByRelID returns an ImageRef with the associated relation ID in the
// document.
func (d *Document) GetImageByRelID(relID string) (ImageRef, bool) {
for _, img := range d.Images() {
if img.RelID() == relID {
return img, true
}
}
return ImageRef{}, false
}
// Images returns all images mentioned in the document relationships.
func (d *Document) Images() []ImageRef {
ret := []ImageRef{}
t := Image{}
_ = t
imgIdx := 0
for _, rel := range d.docRels.Relationships() {
if rel.Type() != gooxml.ImageType {
continue
}
if imgIdx < len(d.images) {
iref := d.images[imgIdx]
img, err := ImageFromFile(iref.path)
if err != nil {
// TODO: report this error?
} else {
rimg := ImageRef{ref: iref, img: img, d: d}
ret = append(ret, rimg)
}
}
imgIdx++
}
return ret
}
// FormFields extracts all of the fields from a document. They can then be
// manipulated via the methods on the field and the document saved.
func (d *Document) FormFields() []FormField {
ret := []FormField{}
for _, p := range d.Paragraphs() {
runs := p.Runs()
for i, r := range runs {
for _, ic := range r.x.EG_RunInnerContent {
// skip non form fields
if ic.FldChar == nil || ic.FldChar.FfData == nil {
continue
}
// found a begin form field
if ic.FldChar.FldCharTypeAttr == wml.ST_FldCharTypeBegin {
// ensure it has a name
if len(ic.FldChar.FfData.Name) == 0 || ic.FldChar.FfData.Name[0].ValAttr == nil {
continue
}
field := FormField{x: ic.FldChar.FfData}
// for text input boxes, we need a pointer to where to set
// the text as well
if ic.FldChar.FfData.TextInput != nil {
// ensure we always have at lest two IC's
for j := i + 1; j < len(runs)-1; j++ {
if len(runs[j].x.EG_RunInnerContent) == 0 {
continue
}
ic := runs[j].x.EG_RunInnerContent[0]
// look for the 'separate' field
if ic.FldChar != nil && ic.FldChar.FldCharTypeAttr == wml.ST_FldCharTypeSeparate {
if len(runs[j+1].x.EG_RunInnerContent) == 0 {
continue
}
// the value should be the text in the next inner content that is not a field char
if runs[j+1].x.EG_RunInnerContent[0].FldChar == nil {
field.textIC = runs[j+1].x.EG_RunInnerContent[0]
}
}
}
}
ret = append(ret, field)
}
}
}
}
return ret
}
func (doc *Document) onNewRelationship(decMap *zippkg.DecodeMap, target, typ string, files []*zip.File, rel *relationships.Relationship) error {
switch typ {
case gooxml.OfficeDocumentType:
doc.x = wml.NewDocument()
decMap.AddTarget(target, doc.x)
// look for the document relationships file as well
decMap.AddTarget(zippkg.RelationsPathFor(target), doc.docRels.X())
case gooxml.CorePropertiesType:
decMap.AddTarget(target, doc.CoreProperties.X())
case gooxml.ExtendedPropertiesType:
decMap.AddTarget(target, doc.AppProperties.X())
case gooxml.ThumbnailType:
// read our thumbnail
for i, f := range files {
if f == nil {
continue
}
if f.Name == target {
rc, err := f.Open()
if err != nil {
return fmt.Errorf("error reading thumbnail: %s", err)
}
doc.Thumbnail, _, err = image.Decode(rc)
rc.Close()
if err != nil {
return fmt.Errorf("error decoding thumbnail: %s", err)
}
files[i] = nil
}
}
case gooxml.SettingsType:
decMap.AddTarget(target, doc.Settings.X())
case gooxml.NumberingType:
doc.Numbering = NewNumbering()
decMap.AddTarget(target, doc.Numbering.X())
case gooxml.StylesType:
doc.Styles.Clear()
decMap.AddTarget(target, doc.Styles.X())
case gooxml.HeaderType:
hdr := wml.NewHdr()
doc.headers = append(doc.headers, hdr)
decMap.AddTarget(target, hdr)
case gooxml.FooterType:
ftr := wml.NewFtr()
doc.footers = append(doc.footers, ftr)
decMap.AddTarget(target, ftr)
case gooxml.ThemeType:
thm := dml.NewTheme()
doc.themes = append(doc.themes, thm)
decMap.AddTarget(target, thm)
case gooxml.WebSettingsType:
doc.webSettings = wml.NewWebSettings()
decMap.AddTarget(target, doc.webSettings)
case gooxml.FontTableType:
doc.fontTable = wml.NewFonts()
decMap.AddTarget(target, doc.fontTable)
case gooxml.EndNotesType:
doc.endNotes = wml.NewEndnotes()
decMap.AddTarget(target, doc.endNotes)
case gooxml.FootNotesType:
doc.footNotes = wml.NewFootnotes()
decMap.AddTarget(target, doc.footNotes)
case gooxml.ImageType:
for i, f := range files {
if f == nil {
continue
}
if f.Name == target {
path, err := zippkg.ExtractToDiskTmp(f, doc.TmpPath)
if err != nil {
return err
}
img, err := ImageFromFile(path)
if err != nil {
return err
}
_ = img
ref := &iref{path: img.Path}
doc.images = append(doc.images, ref)
files[i] = nil
}
}
default:
log.Printf("unsupported relationship type: %s tgt: %s", typ, target)
}
return nil
}