unipdf/internal/strutils/encoding.go
2019-05-16 20:44:51 +00:00

96 lines
2.3 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
// Package strutils provides convenient functions for string processing in unidoc internally.
package strutils
import (
"bytes"
"unicode/utf16"
"github.com/unidoc/unipdf/v3/common"
)
var pdfdocEncodingRuneMap map[rune]byte
func init() {
pdfdocEncodingRuneMap = map[rune]byte{}
for b, r := range pdfDocEncoding {
pdfdocEncodingRuneMap[r] = b
}
}
// UTF16ToRunes decodes the UTF-16BE encoded byte slice `b` to unicode runes.
func UTF16ToRunes(b []byte) []rune {
if len(b) == 1 {
return []rune{rune(b[0])}
}
if len(b)%2 != 0 {
b = append(b, 0)
common.Log.Debug("ERROR: UTF16ToRunes. Padding with zeros.")
}
n := len(b) >> 1
chars := make([]uint16, n)
for i := 0; i < n; i++ {
chars[i] = uint16(b[i<<1])<<8 + uint16(b[i<<1+1])
}
runes := utf16.Decode(chars)
return runes
}
// UTF16ToString decodes the UTF-16BE encoded byte slice `b` to a unicode go string.
func UTF16ToString(b []byte) string {
return string(UTF16ToRunes(b))
}
// StringToUTF16 encoded `s` to UTF16 and returns a string containing UTF16 runes.
func StringToUTF16(s string) string {
encoded := utf16.Encode([]rune(s))
var buf bytes.Buffer
for _, code := range encoded {
buf.WriteByte(byte((code >> 8) & 0xff))
buf.WriteByte(byte(code & 0xff))
}
return buf.String()
}
// PDFDocEncodingToRunes decodes PDFDocEncoded byte slice `b` to unicode runes.
func PDFDocEncodingToRunes(b []byte) []rune {
var runes []rune
for _, bval := range b {
rune, has := pdfDocEncoding[bval]
if !has {
common.Log.Debug("Error: PDFDocEncoding input mapping error %d - skipping", bval)
continue
}
runes = append(runes, rune)
}
return runes
}
// PDFDocEncodingToString decodes PDFDocEncoded byte slice `b` to unicode go string.
func PDFDocEncodingToString(b []byte) string {
return string(PDFDocEncodingToRunes(b))
}
// StringToPDFDocEncoding encoded go string `s` to PdfDocEncoding.
func StringToPDFDocEncoding(s string) []byte {
var buf bytes.Buffer
for _, r := range s {
b, has := pdfdocEncodingRuneMap[r]
if !has {
common.Log.Debug("ERROR: PDFDocEncoding rune mapping missing %c/%X - skipping", r, r)
continue
}
buf.WriteByte(b)
}
return buf.Bytes()
}