diff --git a/main.go b/main.go index 66d9cdfb..67a58595 100644 --- a/main.go +++ b/main.go @@ -2,9 +2,11 @@ package main import ( "archive/zip" + "ccitt/ccitt" "fmt" "image" "image/png" + "io" "log" "os" @@ -15,7 +17,7 @@ import ( pdf "github.com/unidoc/unidoc/pdf/model" ) -/*func getPixels(file io.Reader) ([][]byte, error) { +func getPixels(file io.Reader) ([][]byte, error) { img, _, err := image.Decode(file) if err != nil { return nil, err @@ -38,6 +40,7 @@ import ( } return pixels, nil } + // sliceDiff compares two slices returning the first index of the different // elements pair. Returns -1 if the slices contain the same elements func slicesDiff(s1, s2 []byte) int { @@ -54,7 +57,8 @@ func slicesDiff(s1, s2 []byte) int { } return -1 } -func main() { + +/*func main() { image.RegisterFormat("png", "png", png.Decode, png.DecodeConfig) file, err := os.Open("/home/darkrengarius/Downloads/scan2.png") if err != nil { @@ -89,6 +93,38 @@ var xObjectImages = 0 var inlineImages = 0 func main() { + image.RegisterFormat("png", "png", png.Decode, png.DecodeConfig) + file, err := os.Open("/home/darkrengarius/Downloads/p3_0.png") + if err != nil { + log.Fatalf("Error opening file: %v\n", err) + } + defer file.Close() + pixels, err := getPixels(file) + if err != nil { + log.Fatalf("Error decoding the image: %v\n", err) + } + encoder := &ccitt.Encoder{ + Columns: 2560, + K: 4, + EndOfLine: true, + EncodedByteAlign: true, + EndOfBlock: true, + //Rows: 3295, + } + encoded := encoder.Encode(pixels) + + f, err := os.Create("/home/darkrengarius/Downloads/K4-Columns2560-EOL-Aligned-EOFB.gr3") + if err != nil { + log.Fatalf("Error creating file: %v\n", err) + } + + _, err = f.Write(encoded) + if err != nil { + log.Fatalf("Error saving to file: %v\n", err) + } + + log.Println() + /*var files []string err := filepath.Walk("/home/darkrengarius/Downloads/4111112", func(path string, info os.FileInfo, err error) error { @@ -110,7 +146,7 @@ func main() { outputPath := "/home/darkrengarius/Downloads/testCombined000141.zip" fmt.Printf("Input file: %s\n", inputPath) - err := extractImagesToArchive(inputPath, outputPath) + err = extractImagesToArchive(inputPath, outputPath) if err != nil { fmt.Printf("Error: %v\n", err) os.Exit(1) diff --git a/pdf/core/ccittfaxdecode/decode.go b/pdf/core/ccittfaxdecode/decode.go index 5edb2f30..72ff32a2 100644 --- a/pdf/core/ccittfaxdecode/decode.go +++ b/pdf/core/ccittfaxdecode/decode.go @@ -9,6 +9,9 @@ var ( ErrEOFBCorrupt = errors.New("EOFB code is corrupted") ErrRTCCorrupt = errors.New("RTC code is corrupted") ErrWrongCodeInHorizontalMode = errors.New("wrong code in horizontal mode") + ErrNoEOLFound = errors.New("no EOL found while the EndOfLine parameter is true") + ErrInvalidEOL = errors.New("invalid EOL") + ErrInvalid2DCode = errors.New("invalid 2D code") whiteTree = &decodingTreeNode{ Val: 255, @@ -89,7 +92,7 @@ func (e *Encoder) decodeG31D(encoded []byte) ([][]byte, error) { gotEOL, bitPos = tryFetchEOL(encoded, bitPos) if !gotEOL { if e.EndOfLine { - return nil, errors.New("no EOL found while the EndOfLine parameter is true") + return nil, ErrNoEOLFound } } else { // 5 EOLs left to fill RTC @@ -101,7 +104,7 @@ func (e *Encoder) decodeG31D(encoded []byte) ([][]byte, error) { break } - return nil, errors.New("invalid EOL") + return nil, ErrInvalidEOL } } @@ -150,7 +153,7 @@ byteLoop: if !gotEOL { if e.EndOfLine { - return nil, errors.New("no EOL found while the EndOfLine parameter is true") + return nil, ErrNoEOLFound } } @@ -183,7 +186,7 @@ byteLoop: break byteLoop } else { if e.EndOfLine { - return nil, errors.New("no EOL found while the EndOfLine parameter is true") + return nil, ErrNoEOLFound } } } @@ -298,7 +301,7 @@ func (e *Encoder) decodeG4(encoded []byte) ([][]byte, error) { for a0 < e.Columns { twoDimCode, bitPos, ok = fetchNext2DCode(encoded, bitPos) if !ok { - return nil, errors.New("wrong 2 dim code") + return nil, ErrInvalid2DCode } switch twoDimCode { diff --git a/pdf/core/ccittfaxdecode/decode_test.go b/pdf/core/ccittfaxdecode/decode_test.go index babcb8bc..a6666994 100644 --- a/pdf/core/ccittfaxdecode/decode_test.go +++ b/pdf/core/ccittfaxdecode/decode_test.go @@ -4,6 +4,10 @@ import ( "testing" ) +const ( + testDataPath = "./testdata" +) + func TestDecodeNextRunLen(t *testing.T) { type testResult struct { PixelsRow []byte @@ -718,6 +722,260 @@ func TestDecodeHorizontalMode(t *testing.T) { } } +func TestDecodePassMode(t *testing.T) { + type testResult struct { + PixelsRow []byte + A0 int + } + + type testData struct { + Pixels [][]byte + PixelsRow []byte + IsWhite bool + A0 int + Want testResult + } + + tests := []testData{ + { + Pixels: [][]byte{ + {white, white, white, white, white}, + }, + PixelsRow: nil, + IsWhite: true, + A0: -1, + Want: testResult{ + PixelsRow: []byte{white, white, white, white, white}, + A0: 5, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black}, + {white, white, white, white, white}, + }, + PixelsRow: []byte{black}, + IsWhite: true, + A0: 1, + Want: testResult{ + PixelsRow: []byte{black, white, white, white, white}, + A0: 5, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black}, + {white, white, white, white, white}, + }, + PixelsRow: []byte{}, + IsWhite: false, + A0: 0, + Want: testResult{ + PixelsRow: []byte{black, black, black, black, black}, + A0: 5, + }, + }, + } + + for _, test := range tests { + gotPixelsRow, gotA0 := decodePassMode(test.Pixels, test.PixelsRow, test.IsWhite, test.A0) + + if len(gotPixelsRow) != len(test.Want.PixelsRow) { + t.Errorf("Wrong pixels row len. Got %v, want %v\n", len(gotPixelsRow), len(test.Want.PixelsRow)) + } else { + for i := range gotPixelsRow { + if gotPixelsRow[i] != test.Want.PixelsRow[i] { + t.Errorf("Wrong pixel at %v. Got %v, want %v\n", + i, gotPixelsRow[i], test.Want.PixelsRow[i]) + } + } + } + + if gotA0 != test.Want.A0 { + t.Errorf("Wrong a0. Got %v, want %v\n", gotA0, test.Want.A0) + } + } +} + +func TestDecode(t *testing.T) { + type testResult struct { + Pixels [][]byte + Err error + } + + type testData struct { + Encoder Encoder + InputFilePath string + Want testResult + } + + tests := []testData{} +} + +func TestDecodeVerticalMode(t *testing.T) { + type testResult struct { + PixelsRow []byte + A0 int + } + + type testData struct { + Pixels [][]byte + PixelsRow []byte + IsWhite bool + A0 int + Shift int + Want testResult + } + + tests := []testData{ + { + Pixels: [][]byte{ + {white, white, white, white, white}, + }, + PixelsRow: nil, + IsWhite: true, + A0: -1, + Shift: 0, + Want: testResult{ + PixelsRow: []byte{white, white, white, white, white}, + A0: 5, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black}, + {white, white, white, white, white}, + }, + PixelsRow: []byte{white}, + IsWhite: false, + A0: 1, + Shift: -1, + Want: testResult{ + PixelsRow: []byte{white, black, black, black}, + A0: 4, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black}, + {white, white, white, white, white}, + }, + PixelsRow: []byte{}, + IsWhite: true, + A0: -1, + Shift: -2, + Want: testResult{ + PixelsRow: []byte{white, white, white}, + A0: 3, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black}, + {white, white, white, white, white}, + }, + PixelsRow: nil, + IsWhite: true, + A0: -1, + Shift: -3, + Want: testResult{ + PixelsRow: []byte{white, white}, + A0: 2, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black, black, black, black}, + {white, black, white, black, black, black, black, black}, + }, + PixelsRow: []byte{black}, + IsWhite: true, + A0: 1, + Shift: -1, + Want: testResult{ + PixelsRow: []byte{black, white}, + A0: 2, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black, black, black, black}, + {white, black, white, black, black, black, black, black}, + }, + PixelsRow: []byte{black}, + IsWhite: true, + A0: 1, + Shift: 0, + Want: testResult{ + PixelsRow: []byte{black, white, white}, + A0: 3, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black, black, black, black}, + {white, black, white, black, black, black, black, black}, + }, + PixelsRow: []byte{black}, + IsWhite: true, + A0: 1, + Shift: 1, + Want: testResult{ + PixelsRow: []byte{black, white, white, white}, + A0: 4, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black, black, black, black}, + {white, black, white, black, black, black, black, black}, + }, + PixelsRow: []byte{black}, + IsWhite: true, + A0: 1, + Shift: 2, + Want: testResult{ + PixelsRow: []byte{black, white, white, white, white}, + A0: 5, + }, + }, + { + Pixels: [][]byte{ + {black, black, black, black, black, black, black, black}, + {white, black, white, black, black, black, black, black}, + }, + PixelsRow: []byte{black}, + IsWhite: true, + A0: 1, + Shift: 3, + Want: testResult{ + PixelsRow: []byte{black, white, white, white, white, white}, + A0: 6, + }, + }, + } + + for _, test := range tests { + gotPixelsRow, gotA0 := decodeVerticalMode(test.Pixels, test.PixelsRow, test.IsWhite, test.A0, test.Shift) + + if len(gotPixelsRow) != len(test.Want.PixelsRow) { + t.Errorf("Wrong pixels row len. Got %v, want %v\n", + len(gotPixelsRow), len(test.Want.PixelsRow)) + } else { + for i := range gotPixelsRow { + if gotPixelsRow[i] != test.Want.PixelsRow[i] { + t.Errorf("Wrong pixel at %v. Got %v, want %v\n", + i, gotPixelsRow[i], test.Want.PixelsRow[i]) + } + } + } + + if gotA0 != test.Want.A0 { + t.Errorf("Wrong a0. Got %v, want %v\n", gotA0, test.Want.A0) + } + } +} + func TestDecodeRow1D(t *testing.T) { type testResult struct { PixelsRow []byte diff --git a/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Aligned-EOFB.gr3 b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Aligned-EOFB.gr3 new file mode 100644 index 00000000..65461620 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Aligned-EOFB.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Aligned-Rows3295.gr3 b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Aligned-Rows3295.gr3 new file mode 100644 index 00000000..997ac478 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Aligned-Rows3295.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOFB.gr3 b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOFB.gr3 new file mode 100644 index 00000000..b802c130 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOFB.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Aligned-EOFB.gr3 b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Aligned-EOFB.gr3 new file mode 100644 index 00000000..65461620 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Aligned-EOFB.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Aligned-Rows3295.gr3 b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Aligned-Rows3295.gr3 new file mode 100644 index 00000000..997ac478 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Aligned-Rows3295.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-EOFB.gr3 b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-EOFB.gr3 new file mode 100644 index 00000000..b802c130 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-EOFB.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Rows3295.gr3 b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Rows3295.gr3 new file mode 100644 index 00000000..9bfb448d Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-EOL-Rows3295.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Rows3295.gr3 b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Rows3295.gr3 new file mode 100644 index 00000000..9bfb448d Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K0-Columns2560-Rows3295.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Aligned-EOFB.gr3 b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Aligned-EOFB.gr3 new file mode 100644 index 00000000..3d6343cc Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Aligned-EOFB.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Aligned-Rows3295.gr3 b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Aligned-Rows3295.gr3 new file mode 100644 index 00000000..abe1432a Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Aligned-Rows3295.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOFB.gr3 b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOFB.gr3 new file mode 100644 index 00000000..b311e351 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOFB.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Aligned-EOFB.gr3 b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Aligned-EOFB.gr3 new file mode 100644 index 00000000..3d6343cc Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Aligned-EOFB.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Aligned-Rows3295.gr3 b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Aligned-Rows3295.gr3 new file mode 100644 index 00000000..abe1432a Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Aligned-Rows3295.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-EOFB.gr3 b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-EOFB.gr3 new file mode 100644 index 00000000..b311e351 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-EOFB.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Rows3295.gr3 b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Rows3295.gr3 new file mode 100644 index 00000000..799e0261 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-EOL-Rows3295.gr3 differ diff --git a/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Rows3295.gr3 b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Rows3295.gr3 new file mode 100644 index 00000000..799e0261 Binary files /dev/null and b/pdf/core/ccittfaxdecode/testdata/K4-Columns2560-Rows3295.gr3 differ diff --git a/pdf/core/encoding.go b/pdf/core/encoding.go index 7f551a10..1ce9712b 100644 --- a/pdf/core/encoding.go +++ b/pdf/core/encoding.go @@ -1775,30 +1775,6 @@ func newCCITTFaxEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfO return encoder, nil } -/*func getPixels(file io.Reader) ([][]byte, error) { - img, _, err := goimage.Decode(file) - if err != nil { - return nil, err - } - bounds := img.Bounds() - w, h := bounds.Max.X, bounds.Max.Y - var pixels [][]byte - for y := 0; y < h; y++ { - var row []byte - for x := 0; x < w; x++ { - r, g, b, _ := img.At(x, y).RGBA() - if r == 65535 && g == 65535 && b == 65535 { - // append white - row = append(row, 1) - } else { - row = append(row, 0) - } - } - pixels = append(pixels, row) - } - return pixels, nil -}*/ - func (this *CCITTFaxEncoder) DecodeBytes(encoded []byte) ([]byte, error) { encoder := &ccittfaxdecode.Encoder{ K: this.K,