diff --git a/common/logging.go b/common/logging.go index 7f71fc5d..cc35d736 100644 --- a/common/logging.go +++ b/common/logging.go @@ -41,6 +41,8 @@ func (this DummyLogger) Debug(format string, args ...interface{}) { // Simple Console Logger that the tests use. type ConsoleLogger struct{} +const DebugOutput = false + func (this ConsoleLogger) Error(format string, args ...interface{}) { this.output(os.Stderr, "[ERROR] ", format, args...) } @@ -58,7 +60,9 @@ func (this ConsoleLogger) Info(format string, args ...interface{}) { } func (this ConsoleLogger) Debug(format string, args ...interface{}) { - this.output(os.Stdout, "[DEBUG] ", format, args...) + if DebugOutput { + this.output(os.Stdout, "[DEBUG] ", format, args...) + } } func (this ConsoleLogger) output(f *os.File, prefix, format string, args ...interface{}) { diff --git a/pdf/page.go b/pdf/page.go index c5ebb4b4..cf4ece77 100644 --- a/pdf/page.go +++ b/pdf/page.go @@ -15,6 +15,7 @@ package pdf import ( "errors" "fmt" + "os" "regexp" "strconv" @@ -220,6 +221,16 @@ type PdfPage struct { pageDict *PdfObjectDictionary } +func (this *PdfPage) Show(pageNum int) { + this.GetPageDict() + common.Log.Info("-PdfPage.Show page %d ========================================", pageNum) + fmt.Printf("BoxColorInfo=%s\n", Trace(this.BoxColorInfo)) + fmt.Printf("Contents=%s\n", Trace(this.Contents)) + ShowDict(os.Stdout, "PdfPage.pageDict", this.pageDict) + this.Resources.Show() + common.Log.Info("+PdfPage.Show page %d ========================================", pageNum) +} + func NewPdfPage() *PdfPage { page := PdfPage{} page.pageDict = &PdfObjectDictionary{} @@ -696,6 +707,20 @@ type PdfPageResources struct { ProcSet PdfObject } +func (this *PdfPageResources) Show() { + common.Log.Info("-PdfPageResources.Show ========================================") + fmt.Printf("ExtGState=%s\n", Trace(this.ExtGState)) + fmt.Printf("ColorSpace=%s\n", Trace(this.ColorSpace)) + fmt.Printf("Pattern=%s\n", Trace(this.Pattern)) + fmt.Printf("Shading=%s\n", Trace(this.Shading)) + ShowDict(os.Stdout, "PdfPageResources.XObject", this.XObject) + ShowDict(os.Stdout, "PdfPageResources.Font", this.Font) + // fmt.Printf("Font=%s\n", Trace(this.Font)) + fmt.Printf("ProcSet=%s\n", Trace(this.ProcSet)) + common.Log.Info("+PdfPageResources.Show ========================================") + +} + func NewPdfPageResourcesFromDict(dict *PdfObjectDictionary) (*PdfPageResources, error) { r := PdfPageResources{} diff --git a/pdf/pdfparser.go b/pdf/pdfparser.go index d13feee2..89f66b97 100644 --- a/pdf/pdfparser.go +++ b/pdf/pdfparser.go @@ -1199,14 +1199,14 @@ func (this *PdfParser) parseIndirectObjectBase(isIndirect bool) (PdfObject, erro hb := make([]byte, hlen) _, err = this.ReadAtLeast(hb, hlen) if err != nil { - common.Log.Debug("ERROR: unable to read - %s", err) + common.Log.Error("unable to read - %s", err) return nil, err } common.Log.Debug("textline: %s", hb) result := reIndirectObject.FindStringSubmatch(string(hb)) if len(result) < 3 { - common.Log.Debug("ERROR: Unable to find object signature (%s)", string(hb)) + common.Log.Error("Unable to find object signature (%s)", string(hb)) return &indirect, errors.New("Unable to detect indirect object signature") } @@ -1280,18 +1280,18 @@ func (this *PdfParser) parseIndirectObjectBase(isIndirect bool) (PdfObject, erro return nil, errors.New("Stream length needs to be an integer") } streamLength := *pstreamLength - if streamLength < 0 { + if streamLength < 0 { // !@#$ < 1 ? return nil, errors.New("Stream needs to be longer than 0") } stream := make([]byte, streamLength) _, err = this.ReadAtLeast(stream, int(streamLength)) if err != nil { - common.Log.Debug("ERROR stream (%d): %X", len(stream), stream) + common.Log.Error("stream (%d): %X", len(stream), stream) return nil, err } - streamobj := PdfObjectStream{} + streamobj := PdfObjectStream{} // !@#$ streamobj.Stream = stream streamobj.PdfObjectDictionary = indirect.PdfObject.(*PdfObjectDictionary) streamobj.ObjectNumber = indirect.ObjectNumber diff --git a/pdf/reader.go b/pdf/reader.go index 8b64d5ac..d9639193 100644 --- a/pdf/reader.go +++ b/pdf/reader.go @@ -467,7 +467,7 @@ func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirec return nil } if *objType != "Pages" { - common.Log.Debug("ERROR: Table of content containing non Page/Pages object! (%s)", objType) + common.Log.Error("Table of content containing non Page/Pages object! (%s)", objType) return errors.New("Table of content containing non Page/Pages object!") } @@ -484,7 +484,7 @@ func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirec kidsObj, err := this.parser.Trace((*nodeDict)["Kids"]) if err != nil { - common.Log.Debug("ERROR: Failed loading Kids object") + common.Log.Error("Failed loading Kids object") return err } @@ -505,7 +505,7 @@ func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirec common.Log.Debug("**Child: %d - %s", idx, child) child, ok := child.(*PdfIndirectObject) if !ok { - common.Log.Debug("ERROR: Page not indirect object - (%s)", child) + common.Log.Error("Page not indirect object - (%s)", child) panic(errors.New("Page not indirect object")) return errors.New("Page not indirect object") } diff --git a/pdf/utils.go b/pdf/utils.go index 338786f2..90a791ab 100644 --- a/pdf/utils.go +++ b/pdf/utils.go @@ -146,12 +146,7 @@ func (this *PdfParser) inspect() (map[string]int, error) { return objTypes, nil } -func ShowDict(w *os.File, name string, d *PdfObjectDictionary) { - keys := []string{} - for k := range *d { - keys = append(keys, string(k)) - } - sort.Strings(keys) +func ShowDict(w *os.File, name string, o PdfObject) { _, file, line, ok := runtime.Caller(1) if !ok { file = "???" @@ -159,28 +154,68 @@ func ShowDict(w *os.File, name string, d *PdfObjectDictionary) { } else { file = filepath.Base(file) } - fmt.Fprintf(w, "ShowDict: %s:%d %q %d\n", file, line, name, len(*d)) - for i, k := range keys { - v := (*d)[PdfObjectName(k)] - ref := "" - if io, ok := v.(*PdfIndirectObject); ok { - v = (*io).PdfObject - ref = (*io).PdfObjectReference.String() - } - s := fmt.Sprintf("%T", v) - if i, ok := v.(*PdfObjectInteger); ok { - s = fmt.Sprintf("%d", *i) - } else if n, ok := v.(*PdfObjectName); ok { - s = fmt.Sprintf("%#q", *n) - } else if n, ok := v.(*PdfObjectString); ok { - s = fmt.Sprintf("%q", *n) - } else if x, ok := v.(*PdfObjectFloat); ok { - s = fmt.Sprintf("%f", *x) - } else if b, ok := v.(*PdfObjectBool); ok { - s = fmt.Sprintf("%t", *b) - } - fmt.Fprintf(w, "%4d: %#20q: %10s %s\n", i, k, s, ref) + + if o == nil { + fmt.Fprintf(w, "ShowDict: %s:%d %q nil\n", file, line, name) + return } + ref := "" + if io, isIndirect := o.(*PdfIndirectObject); isIndirect { + o = io.PdfObject + ref = (*io).PdfObjectReference.String() + } + d := o.(*PdfObjectDictionary) + fmt.Fprintf(w, "ShowDict: %s:%d %q %d %s\n", file, line, name, len(*d), ref) + showDict(w, d, "") +} + +func showDict(w *os.File, d *PdfObjectDictionary, indent string) { + for i, k := range sortedKeys(d) { + v := (*d)[PdfObjectName(k)] + if e, ok := v.(*PdfObjectDictionary); ok { + fmt.Fprintf(w, indent+"%4d: %#10q:\n", i, k) + showDict(w, e, indent+" ") + } else { + fmt.Fprintf(w, indent+"%4d: %#10q: %s\n", i, k, ObjStr(v)) + } + } +} + +func sortedKeys(d *PdfObjectDictionary) []string { + keys := []string{} + for k := range *d { + keys = append(keys, string(k)) + } + sort.Strings(keys) + return keys +} + +func ObjStr(v PdfObject) string { + ref := "--- ---" + if io, ok := v.(*PdfIndirectObject); ok { + v = (*io).PdfObject + ref = (*io).PdfObjectReference.String() + } + s := fmt.Sprintf("%T", v) + if i, ok := v.(*PdfObjectInteger); ok { + s = fmt.Sprintf("%d", *i) + } else if n, ok := v.(*PdfObjectName); ok { + s = fmt.Sprintf("%#q", *n) + } else if n, ok := v.(*PdfObjectString); ok { + s = fmt.Sprintf("%q", *n) + } else if x, ok := v.(*PdfObjectFloat); ok { + s = fmt.Sprintf("%f", *x) + } else if b, ok := v.(*PdfObjectBool); ok { + s = fmt.Sprintf("%t", *b) + } else if x, ok := v.(*PdfObjectStream); ok { + s = fmt.Sprintf("%s %s", s, (*x).PdfObjectDictionary) + } else if d, ok := v.(*PdfObjectDictionary); ok { + s = fmt.Sprintf("%s %s", s, *d) + } else if d, ok := v.(*PdfObjectArray); ok { + s = fmt.Sprintf("%s %s", s, *d) + } + + return fmt.Sprintf("%-9s %s", ref, s) } func Trace(obj PdfObject) PdfObject { diff --git a/pdf/writer.go b/pdf/writer.go index b8edd6b7..a32c32c8 100644 --- a/pdf/writer.go +++ b/pdf/writer.go @@ -16,6 +16,7 @@ import ( "fmt" "io" "os" + "sort" "time" "github.com/unidoc/unidoc/common" @@ -77,6 +78,83 @@ type PdfWriter struct { ids *PdfObjectArray } +// Show prints information about a PdfWriter's contents +func (this *PdfWriter) Show() { + typeCounts := map[string]int{} + + common.Log.Info("-PdfWriter.Show ========================================") + fmt.Printf("root=%s\n", Trace(this.root)) + fmt.Printf("catalog=%s\n", Trace(this.pages)) + fmt.Printf("infoObj=%s\n", Trace(this.infoObj)) + fmt.Printf("pages=%s\n", Trace(this.pages)) + fmt.Printf("objects=%d\n", len(this.objects)) + sort.Stable(byObject(this.objects)) + for i, o := range this.objects { + _, _, t, u := ObjStreamType(o) + fmt.Printf("%10d: [%s:%s] %s\n", i, t, u, ObjStr(o)) + typeCounts[t]++ + } + fmt.Printf("ids=%s\n", Trace(this.ids)) + common.Log.Info("+PdfWriter.Show ========================================") + for t, n := range typeCounts { + fmt.Printf("%#20q: %3d\n", t, n) + } +} + +// byObject sorts slices of PdfObject by "Type" and "Subtype" keys. See ObjStreamType +type byObject []PdfObject + +func (x byObject) Len() int { return len(x) } + +func (x byObject) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +func (x byObject) Less(i, j int) bool { + si, di, ti, ui := ObjStreamType(x[i]) + sj, dj, tj, uj := ObjStreamType(x[j]) + + if ti != tj { + return ti > tj + } + if ui != uj { + return ui > uj + } + if si != sj { + return si + } + if di != dj { + return si + } + return false +} + +// ObjStreamType returns information about streams and dicts: isStream, isDict, typ, subtyp +// isStream: is `o`a PdfObjectStream? +// isDict: is `o`a PdfObjectDictionary? +// type: "Type" value of dict or stream +// type: "Subtype" value of dict or stream +func ObjStreamType(o PdfObject) (isStream, isDict bool, typ, subtyp string) { + if io, ok := o.(*PdfIndirectObject); ok { + o = (*io).PdfObject + } + var d *PdfObjectDictionary = nil + if s, ok := o.(*PdfObjectStream); ok { + d = (*s).PdfObjectDictionary + isStream = true + } else if s, ok := o.(*PdfObjectDictionary); ok { + d = s + isDict = true + } + if d != nil { + if v, ok := (*d)["Type"]; ok { + typ = string(*(v.(*PdfObjectName))) + } + if v, ok := (*d)["Subtype"]; ok { + subtyp = string(*(v.(*PdfObjectName))) + } + } + return +} + func NewPdfWriter() PdfWriter { w := PdfWriter{} @@ -211,7 +289,7 @@ func (this *PdfWriter) addObjects(obj PdfObject) error { if _, isReference := obj.(*PdfObjectReference); isReference { // Should never be a reference, should already be resolved. - common.Log.Debug("ERROR: Cannot be a reference!") + common.Log.Error("Cannot be a reference!") return errors.New("Reference not allowed") }