Added a test for multibyte encoding.

This commit is contained in:
Peter Williams 2020-05-20 19:07:22 +10:00
parent a9910e7e06
commit 0c54cec2c5

View File

@ -316,6 +316,11 @@ var fileExtractionTests = []struct {
`The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",`},
},
},
{filename: "Saudi.pdf",
pageTerms: map[int][]string{
10: []string{"الله"},
},
},
// TODO(peterwilliams97): Reinstate these 2 tests when diacritic combination is fixed.
// {filename: "Ito_Formula.pdf",
// pageTerms: map[int][]string{