Search xref objects with tolerance both to the left and right (#447)

* Search xref objects with tolerance both to the left and right. Try searching xref to the left only if not found to the right.
This commit is contained in:
Adrian-George Bostan 2019-04-28 15:26:50 +03:00 committed by Gunnsteinn Hall
parent 7cfcb9cdfd
commit dcc0723e70
2 changed files with 35 additions and 31 deletions

View File

@ -83,6 +83,10 @@ func (parser *PdfParser) GetFileOffset() int64 {
// SetFileOffset sets the file to an offset position and resets buffer. // SetFileOffset sets the file to an offset position and resets buffer.
func (parser *PdfParser) SetFileOffset(offset int64) { func (parser *PdfParser) SetFileOffset(offset int64) {
if offset < 0 {
offset = 0
}
parser.rs.Seek(offset, io.SeekStart) parser.rs.Seek(offset, io.SeekStart)
parser.reader = bufio.NewReader(parser.rs) parser.reader = bufio.NewReader(parser.rs)
} }

View File

@ -1103,43 +1103,43 @@ func (parser *PdfParser) parseXrefStream(xstm *PdfObjectInteger) (*PdfObjectDict
return trailerDict, nil return trailerDict, nil
} }
// Parse xref table at the current file position. Can either be a // Parse xref table at the current file position. Can either be a standard xref
// standard xref table, or an xref stream. // table, or an xref stream.
func (parser *PdfParser) parseXref() (*PdfObjectDictionary, error) { func (parser *PdfParser) parseXref() (*PdfObjectDictionary, error) {
var err error // Search xrefs within 20 bytes of the current location. If the first
var trailerDict *PdfObjectDictionary // iteration of the loop is unable to find a match, peek another 20 bytes
// left of the current location, add them to the previously read buffer
// Points to xref table or xref stream object? // and try again.
bb, _ := parser.reader.Peek(20) const bufLen = 20
if reIndirectObject.MatchString(string(bb)) { bb, _ := parser.reader.Peek(bufLen)
common.Log.Trace("xref points to an object. Probably xref object") for i := 0; i < 2; i++ {
common.Log.Trace("starting with \"%s\"", string(bb)) if reIndirectObject.Match(bb) {
trailerDict, err = parser.parseXrefStream(nil) common.Log.Trace("xref points to an object. Probably xref object")
if err != nil { common.Log.Debug("starting with \"%s\"", string(bb))
return nil, err return parser.parseXrefStream(nil)
} }
} else if reXrefTable.MatchString(string(bb)) { if reXrefTable.Match(bb) {
common.Log.Trace("Standard xref section table!") common.Log.Trace("Standard xref section table!")
var err error return parser.parseXrefTable()
trailerDict, err = parser.parseXrefTable()
if err != nil {
return nil, err
}
} else {
common.Log.Debug("Warning: Unable to find xref table or stream. Repair attempted: Looking for earliest xref from bottom.")
err := parser.repairSeekXrefMarker()
if err != nil {
common.Log.Debug("Repair failed - %v", err)
return nil, err
} }
trailerDict, err = parser.parseXrefTable() // xref match failed. Peek 20 bytes to the left of the current offset,
if err != nil { // append them to the previously read buffer and try again. Reset to the
return nil, err // original offset after reading.
} offset := parser.GetFileOffset()
parser.SetFileOffset(offset - bufLen)
defer parser.SetFileOffset(offset)
lbb, _ := parser.reader.Peek(bufLen)
bb = append(lbb, bb...)
} }
return trailerDict, err common.Log.Debug("Warning: Unable to find xref table or stream. Repair attempted: Looking for earliest xref from bottom.")
if err := parser.repairSeekXrefMarker(); err != nil {
common.Log.Debug("Repair failed - %v", err)
return nil, err
}
return parser.parseXrefTable()
} }
// Look for EOF marker and seek to its beginning. // Look for EOF marker and seek to its beginning.