From dcc0723e70cb815630d2043abcd8f4a50b7199b0 Mon Sep 17 00:00:00 2001 From: Adrian-George Bostan Date: Sun, 28 Apr 2019 15:26:50 +0300 Subject: [PATCH] Search xref objects with tolerance both to the left and right (#447) * Search xref objects with tolerance both to the left and right. Try searching xref to the left only if not found to the right. --- pdf/core/io.go | 4 +++ pdf/core/parser.go | 62 +++++++++++++++++++++++----------------------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/pdf/core/io.go b/pdf/core/io.go index 882d5df7..bf23ed50 100644 --- a/pdf/core/io.go +++ b/pdf/core/io.go @@ -83,6 +83,10 @@ func (parser *PdfParser) GetFileOffset() int64 { // SetFileOffset sets the file to an offset position and resets buffer. func (parser *PdfParser) SetFileOffset(offset int64) { + if offset < 0 { + offset = 0 + } + parser.rs.Seek(offset, io.SeekStart) parser.reader = bufio.NewReader(parser.rs) } diff --git a/pdf/core/parser.go b/pdf/core/parser.go index c5a24021..bf8a1c1d 100755 --- a/pdf/core/parser.go +++ b/pdf/core/parser.go @@ -1103,43 +1103,43 @@ func (parser *PdfParser) parseXrefStream(xstm *PdfObjectInteger) (*PdfObjectDict return trailerDict, nil } -// Parse xref table at the current file position. Can either be a -// standard xref table, or an xref stream. +// Parse xref table at the current file position. Can either be a standard xref +// table, or an xref stream. func (parser *PdfParser) parseXref() (*PdfObjectDictionary, error) { - var err error - var trailerDict *PdfObjectDictionary - - // Points to xref table or xref stream object? - bb, _ := parser.reader.Peek(20) - if reIndirectObject.MatchString(string(bb)) { - common.Log.Trace("xref points to an object. Probably xref object") - common.Log.Trace("starting with \"%s\"", string(bb)) - trailerDict, err = parser.parseXrefStream(nil) - if err != nil { - return nil, err + // Search xrefs within 20 bytes of the current location. If the first + // iteration of the loop is unable to find a match, peek another 20 bytes + // left of the current location, add them to the previously read buffer + // and try again. + const bufLen = 20 + bb, _ := parser.reader.Peek(bufLen) + for i := 0; i < 2; i++ { + if reIndirectObject.Match(bb) { + common.Log.Trace("xref points to an object. Probably xref object") + common.Log.Debug("starting with \"%s\"", string(bb)) + return parser.parseXrefStream(nil) } - } else if reXrefTable.MatchString(string(bb)) { - common.Log.Trace("Standard xref section table!") - var err error - trailerDict, err = parser.parseXrefTable() - if err != nil { - return nil, err - } - } else { - common.Log.Debug("Warning: Unable to find xref table or stream. Repair attempted: Looking for earliest xref from bottom.") - err := parser.repairSeekXrefMarker() - if err != nil { - common.Log.Debug("Repair failed - %v", err) - return nil, err + if reXrefTable.Match(bb) { + common.Log.Trace("Standard xref section table!") + return parser.parseXrefTable() } - trailerDict, err = parser.parseXrefTable() - if err != nil { - return nil, err - } + // xref match failed. Peek 20 bytes to the left of the current offset, + // append them to the previously read buffer and try again. Reset to the + // original offset after reading. + offset := parser.GetFileOffset() + parser.SetFileOffset(offset - bufLen) + defer parser.SetFileOffset(offset) + + lbb, _ := parser.reader.Peek(bufLen) + bb = append(lbb, bb...) } - return trailerDict, err + common.Log.Debug("Warning: Unable to find xref table or stream. Repair attempted: Looking for earliest xref from bottom.") + if err := parser.repairSeekXrefMarker(); err != nil { + common.Log.Debug("Repair failed - %v", err) + return nil, err + } + return parser.parseXrefTable() } // Look for EOF marker and seek to its beginning.