Search xref objects with tolerance both to the left and right (#447)

* Search xref objects with tolerance both to the left and right. Try searching xref to the left only if not found to the right.
This commit is contained in:
Adrian-George Bostan 2019-04-28 15:26:50 +03:00 committed by Gunnsteinn Hall
parent 7cfcb9cdfd
commit dcc0723e70
2 changed files with 35 additions and 31 deletions

View File

@ -83,6 +83,10 @@ func (parser *PdfParser) GetFileOffset() int64 {
// SetFileOffset sets the file to an offset position and resets buffer.
func (parser *PdfParser) SetFileOffset(offset int64) {
if offset < 0 {
offset = 0
}
parser.rs.Seek(offset, io.SeekStart)
parser.reader = bufio.NewReader(parser.rs)
}

View File

@ -1103,43 +1103,43 @@ func (parser *PdfParser) parseXrefStream(xstm *PdfObjectInteger) (*PdfObjectDict
return trailerDict, nil
}
// Parse xref table at the current file position. Can either be a
// standard xref table, or an xref stream.
// Parse xref table at the current file position. Can either be a standard xref
// table, or an xref stream.
func (parser *PdfParser) parseXref() (*PdfObjectDictionary, error) {
var err error
var trailerDict *PdfObjectDictionary
// Points to xref table or xref stream object?
bb, _ := parser.reader.Peek(20)
if reIndirectObject.MatchString(string(bb)) {
common.Log.Trace("xref points to an object. Probably xref object")
common.Log.Trace("starting with \"%s\"", string(bb))
trailerDict, err = parser.parseXrefStream(nil)
if err != nil {
return nil, err
// Search xrefs within 20 bytes of the current location. If the first
// iteration of the loop is unable to find a match, peek another 20 bytes
// left of the current location, add them to the previously read buffer
// and try again.
const bufLen = 20
bb, _ := parser.reader.Peek(bufLen)
for i := 0; i < 2; i++ {
if reIndirectObject.Match(bb) {
common.Log.Trace("xref points to an object. Probably xref object")
common.Log.Debug("starting with \"%s\"", string(bb))
return parser.parseXrefStream(nil)
}
} else if reXrefTable.MatchString(string(bb)) {
common.Log.Trace("Standard xref section table!")
var err error
trailerDict, err = parser.parseXrefTable()
if err != nil {
return nil, err
}
} else {
common.Log.Debug("Warning: Unable to find xref table or stream. Repair attempted: Looking for earliest xref from bottom.")
err := parser.repairSeekXrefMarker()
if err != nil {
common.Log.Debug("Repair failed - %v", err)
return nil, err
if reXrefTable.Match(bb) {
common.Log.Trace("Standard xref section table!")
return parser.parseXrefTable()
}
trailerDict, err = parser.parseXrefTable()
if err != nil {
return nil, err
}
// xref match failed. Peek 20 bytes to the left of the current offset,
// append them to the previously read buffer and try again. Reset to the
// original offset after reading.
offset := parser.GetFileOffset()
parser.SetFileOffset(offset - bufLen)
defer parser.SetFileOffset(offset)
lbb, _ := parser.reader.Peek(bufLen)
bb = append(lbb, bb...)
}
return trailerDict, err
common.Log.Debug("Warning: Unable to find xref table or stream. Repair attempted: Looking for earliest xref from bottom.")
if err := parser.repairSeekXrefMarker(); err != nil {
common.Log.Debug("Repair failed - %v", err)
return nil, err
}
return parser.parseXrefTable()
}
// Look for EOF marker and seek to its beginning.