started with aztec code

This commit is contained in:
boombuler 2016-12-25 11:27:46 +01:00
parent 8b1271e880
commit 3dbe04fe04
4 changed files with 642 additions and 0 deletions

171
aztec/highlevel.go Normal file
View File

@ -0,0 +1,171 @@
package aztec
import (
"github.com/boombuler/barcode/utils"
)
func highlevelEncode(data []byte) *utils.BitList {
states := stateSlice{initialState}
for index := 0; index < len(data); index++ {
pairCode := 0
nextChar := byte(0)
if index+1 < len(data) {
nextChar = data[index+1]
}
switch cur := data[index]; {
case cur == '\r' && nextChar == '\n':
pairCode = 2
case cur == '.' && nextChar == ' ':
pairCode = 3
case cur == ',' && nextChar == ' ':
pairCode = 4
case cur == ':' && nextChar == ' ':
pairCode = 5
}
if pairCode > 0 {
// We have one of the four special PUNCT pairs. Treat them specially.
// Get a new set of states for the two new characters.
states = updateStateListForPair(states, data, index, pairCode)
index++
} else {
// Get a new set of states for the new character.
states = updateStateListForChar(states, data, index)
}
}
minBitCnt := int((^uint(0)) >> 1)
var result *state = nil
for _, s := range states {
if s.bitCount < minBitCnt {
minBitCnt = s.bitCount
result = s
}
}
if result != nil {
return result.toBitList(data)
} else {
return new(utils.BitList)
}
}
func simplifyStates(states stateSlice) stateSlice {
var result stateSlice = nil
for _, newState := range states {
add := true
var newResult stateSlice = nil
for _, oldState := range result {
if add && oldState.isBetterThanOrEqualTo(newState) {
add = false
}
if !(add && newState.isBetterThanOrEqualTo(oldState)) {
newResult = append(newResult, oldState)
}
}
if add {
result = append(newResult, newState)
} else {
result = newResult
}
}
return result
}
// We update a set of states for a new character by updating each state
// for the new character, merging the results, and then removing the
// non-optimal states.
func updateStateListForChar(states stateSlice, data []byte, index int) stateSlice {
var result stateSlice = nil
for _, s := range states {
if r := updateStateForChar(s, data, index); len(r) > 0 {
result = append(result, r...)
}
}
return simplifyStates(result)
}
// Return a set of states that represent the possible ways of updating this
// state for the next character. The resulting set of states are added to
// the "result" list.
func updateStateForChar(s *state, data []byte, index int) stateSlice {
var result stateSlice = nil
ch := data[index]
charInCurrentTable := charMap[s.mode][ch] > 0
var stateNoBinary *state = nil
for mode := mode_upper; mode <= mode_punct; mode++ {
charInMode := charMap[mode][ch]
if charInMode > 0 {
if stateNoBinary == nil {
// Only create stateNoBinary the first time it's required.
stateNoBinary = s.endBinaryShift(index)
}
// Try generating the character by latching to its mode
if !charInCurrentTable || mode == s.mode || mode == mode_digit {
// If the character is in the current table, we don't want to latch to
// any other mode except possibly digit (which uses only 4 bits). Any
// other latch would be equally successful *after* this character, and
// so wouldn't save any bits.
res := stateNoBinary.latchAndAppend(mode, charInMode)
result = append(result, res)
}
// Try generating the character by switching to its mode.
if _, ok := shiftTable[s.mode][mode]; !charInCurrentTable && ok {
// It never makes sense to temporarily shift to another mode if the
// character exists in the current mode. That can never save bits.
res := stateNoBinary.shiftAndAppend(mode, charInMode)
result = append(result, res)
}
}
}
if s.bShiftByteCount > 0 || charMap[s.mode][ch] == 0 {
// It's never worthwhile to go into binary shift mode if you're not already
// in binary shift mode, and the character exists in your current mode.
// That can never save bits over just outputting the char in the current mode.
res := s.addBinaryShiftChar(index)
result = append(result, res)
}
return result
}
// We update a set of states for a new character by updating each state
// for the new character, merging the results, and then removing the
// non-optimal states.
func updateStateListForPair(states stateSlice, data []byte, index int, pairCode int) stateSlice {
var result stateSlice = nil
for _, s := range states {
if r := updateStateForPair(s, data, index, pairCode); len(r) > 0 {
result = append(result, r...)
}
}
return simplifyStates(result)
}
func updateStateForPair(s *state, data []byte, index int, pairCode int) stateSlice {
var result stateSlice
stateNoBinary := s.endBinaryShift(index)
// Possibility 1. Latch to MODE_PUNCT, and then append this code
result = append(result, stateNoBinary.latchAndAppend(mode_punct, pairCode))
if s.mode != mode_punct {
// Possibility 2. Shift to MODE_PUNCT, and then append this code.
// Every state except MODE_PUNCT (handled above) can shift
result = append(result, stateNoBinary.shiftAndAppend(mode_punct, pairCode))
}
if pairCode == 3 || pairCode == 4 {
// both characters are in DIGITS. Sometimes better to just add two digits
digitState := stateNoBinary.
latchAndAppend(mode_digit, 16-pairCode). // period or comma in DIGIT
latchAndAppend(mode_digit, 1) // space in DIGIT
result = append(result, digitState)
}
if s.bShiftByteCount > 0 {
// It only makes sense to do the characters as binary if we're already
// in binary mode.
result = append(result, s.addBinaryShiftChar(index).addBinaryShiftChar(index+1))
}
return result
}

132
aztec/highlevel_test.go Normal file
View File

@ -0,0 +1,132 @@
package aztec
import (
"bytes"
"strings"
"testing"
"github.com/boombuler/barcode/utils"
)
func bitStr(bl *utils.BitList) string {
buf := new(bytes.Buffer)
for i := 0; i < bl.Len(); i++ {
if bl.GetBit(i) {
buf.WriteRune('X')
} else {
buf.WriteRune('.')
}
}
return buf.String()
}
func testHighLevelEncodeString(t *testing.T, s, expectedBits string) {
bits := highlevelEncode([]byte(s))
result := bitStr(bits)
expectedBits = strings.Replace(expectedBits, " ", "", -1)
if result != expectedBits {
t.Errorf("invalid result for highlevelEncode(%q). Got:\n%s", s, result)
}
}
func testHighLevelEncodeStringCnt(t *testing.T, s string, expectedBitCnt int) {
bits := highlevelEncode([]byte(s))
if bits.Len() != expectedBitCnt {
t.Errorf("invalid result for highlevelEncode(%q). Got %d, expected %d bits", s, bits.Len(), expectedBitCnt)
}
}
func Test_HighLevelEncode(t *testing.T) {
testHighLevelEncodeString(t, "A. b.",
// 'A' P/S '. ' L/L b D/L '.'
"...X. ..... ...XX XXX.. ...XX XXXX. XX.X")
testHighLevelEncodeString(t, "Lorem ipsum.",
// 'L' L/L 'o' 'r' 'e' 'm' ' ' 'i' 'p' 's' 'u' 'm' D/L '.'
".XX.X XXX.. X.... X..XX ..XX. .XXX. ....X .X.X. X...X X.X.. X.XX. .XXX. XXXX. XX.X")
testHighLevelEncodeString(t, "Lo. Test 123.",
// 'L' L/L 'o' P/S '. ' U/S 'T' 'e' 's' 't' D/L ' ' '1' '2' '3' '.'
".XX.X XXX.. X.... ..... ...XX XXX.. X.X.X ..XX. X.X.. X.X.X XXXX. ...X ..XX .X.. .X.X XX.X")
testHighLevelEncodeString(t, "Lo...x",
// 'L' L/L 'o' D/L '.' '.' '.' U/L L/L 'x'
".XX.X XXX.. X.... XXXX. XX.X XX.X XX.X XXX. XXX.. XX..X")
testHighLevelEncodeString(t, ". x://abc/.",
//P/S '. ' L/L 'x' P/S ':' P/S '/' P/S '/' 'a' 'b' 'c' P/S '/' D/L '.'
"..... ...XX XXX.. XX..X ..... X.X.X ..... X.X.. ..... X.X.. ...X. ...XX ..X.. ..... X.X.. XXXX. XX.X")
// Uses Binary/Shift rather than Lower/Shift to save two bits.
testHighLevelEncodeString(t, "ABCdEFG",
//'A' 'B' 'C' B/S =1 'd' 'E' 'F' 'G'
"...X. ...XX ..X.. XXXXX ....X .XX..X.. ..XX. ..XXX .X...")
testHighLevelEncodeStringCnt(t,
// Found on an airline boarding pass. Several stretches of Binary shift are
// necessary to keep the bitcount so low.
"09 UAG ^160MEUCIQC0sYS/HpKxnBELR1uB85R20OoqqwFGa0q2uEi"+
"Ygh6utAIgLl1aBVM4EOTQtMQQYH9M2Z3Dp4qnA/fwWuQ+M8L3V8U=",
823)
}
func Test_HighLevelEncodeBinary(t *testing.T) {
// binary short form single byte
testHighLevelEncodeString(t, "N\u0000N",
// 'N' B/S =1 '\0' N
".XXXX XXXXX ....X ........ .XXXX") // Encode "N" in UPPER
testHighLevelEncodeString(t, "N\u0000n",
// 'N' B/S =2 '\0' 'n'
".XXXX XXXXX ...X. ........ .XX.XXX.") // Encode "n" in BINARY
// binary short form consecutive bytes
testHighLevelEncodeString(t, "N\x00\x80 A",
// 'N' B/S =2 '\0' \u0080 ' ' 'A'
".XXXX XXXXX ...X. ........ X....... ....X ...X.")
// binary skipping over single character
testHighLevelEncodeString(t, "\x00a\xFF\x80 A",
// B/S =4 '\0' 'a' '\3ff' '\200' ' ' 'A'
"XXXXX ..X.. ........ .XX....X XXXXXXXX X....... ....X ...X.")
// getting into binary mode from digit mode
testHighLevelEncodeString(t, "1234\u0000",
//D/L '1' '2' '3' '4' U/L B/S =1 \0
"XXXX. ..XX .X.. .X.X .XX. XXX. XXXXX ....X ........")
// Create a string in which every character requires binary
sb := new(bytes.Buffer)
for i := 0; i <= 3000; i++ {
sb.WriteByte(byte(128 + (i % 30)))
}
// Test the output generated by Binary/Switch, particularly near the
// places where the encoding changes: 31, 62, and 2047+31=2078
for _, i := range []int{1, 2, 3, 10, 29, 30, 31, 32, 33, 60, 61, 62, 63, 64, 2076, 2077, 2078, 2079, 2080, 2100} {
// This is the expected length of a binary string of length "i"
expectedLength := (8 * i)
switch {
case i <= 31:
expectedLength += 10
case i <= 62:
expectedLength += 20
case i <= 2078:
expectedLength += 21
default:
expectedLength += 31
}
data := string(sb.Bytes()[:i])
// Verify that we are correct about the length.
testHighLevelEncodeStringCnt(t, data, expectedLength)
if i != 1 && i != 32 && i != 2079 {
// The addition of an 'a' at the beginning or end gets merged into the binary code
// in those cases where adding another binary character only adds 8 or 9 bits to the result.
// So we exclude the border cases i=1,32,2079
// A lower case letter at the beginning will be merged into binary mode
testHighLevelEncodeStringCnt(t, "a"+string(sb.Bytes()[:i-1]), expectedLength)
// A lower case letter at the end will also be merged into binary mode
testHighLevelEncodeStringCnt(t, string(sb.Bytes()[:i-1])+"a", expectedLength)
}
// A lower case letter at both ends will enough to latch us into LOWER.
testHighLevelEncodeStringCnt(t, "a"+data+"b", expectedLength+15)
}
}

264
aztec/state.go Normal file
View File

@ -0,0 +1,264 @@
package aztec
import (
"fmt"
"github.com/boombuler/barcode/utils"
)
type encodingMode byte
const (
mode_upper encodingMode = iota // 5 bits
mode_lower // 5 bits
mode_digit // 4 bits
mode_mixed // 5 bits
mode_punct // 5 bits
)
var (
// The Latch Table shows, for each pair of Modes, the optimal method for
// getting from one mode to another. In the worst possible case, this can
// be up to 14 bits. In the best possible case, we are already there!
// The high half-word of each entry gives the number of bits.
// The low half-word of each entry are the actual bits necessary to change
latchTable = map[encodingMode]map[encodingMode]int{
mode_upper: {
mode_upper: 0,
mode_lower: (5 << 16) + 28,
mode_digit: (5 << 16) + 30,
mode_mixed: (5 << 16) + 29,
mode_punct: (10 << 16) + (29 << 5) + 30,
},
mode_lower: {
mode_upper: (9 << 16) + (30 << 4) + 14,
mode_lower: 0,
mode_digit: (5 << 16) + 30,
mode_mixed: (5 << 16) + 29,
mode_punct: (10 << 16) + (29 << 5) + 30,
},
mode_digit: {
mode_upper: (4 << 16) + 14,
mode_lower: (9 << 16) + (14 << 5) + 28,
mode_digit: 0,
mode_mixed: (9 << 16) + (14 << 5) + 29,
mode_punct: (14 << 16) + (14 << 10) + (29 << 5) + 30,
},
mode_mixed: {
mode_upper: (5 << 16) + 29,
mode_lower: (5 << 16) + 28,
mode_digit: (10 << 16) + (29 << 5) + 30,
mode_mixed: 0,
mode_punct: (5 << 16) + 30,
},
mode_punct: {
mode_upper: (5 << 16) + 31,
mode_lower: (10 << 16) + (31 << 5) + 28,
mode_digit: (10 << 16) + (31 << 5) + 30,
mode_mixed: (10 << 16) + (31 << 5) + 29,
mode_punct: 0,
},
}
// A map showing the available shift codes. (The shifts to BINARY are not shown)
shiftTable = map[encodingMode]map[encodingMode]int{
mode_upper: {
mode_punct: 0,
},
mode_lower: {
mode_punct: 0,
mode_upper: 28,
},
mode_mixed: {
mode_punct: 0,
},
mode_digit: {
mode_punct: 0,
mode_upper: 15,
},
}
charMap map[encodingMode][]int
)
type state struct {
mode encodingMode
tokens token
bShiftByteCount int
bitCount int
}
type stateSlice []*state
var initialState *state = &state{
mode: mode_upper,
tokens: nil,
bShiftByteCount: 0,
bitCount: 0,
}
func init() {
charMap = make(map[encodingMode][]int)
charMap[mode_upper] = make([]int, 256)
charMap[mode_lower] = make([]int, 256)
charMap[mode_digit] = make([]int, 256)
charMap[mode_mixed] = make([]int, 256)
charMap[mode_punct] = make([]int, 256)
charMap[mode_upper][' '] = 1
for c := 'A'; c <= 'Z'; c++ {
charMap[mode_upper][int(c)] = int(c - 'A' + 2)
}
charMap[mode_lower][' '] = 1
for c := 'a'; c <= 'z'; c++ {
charMap[mode_lower][c] = int(c - 'a' + 2)
}
charMap[mode_digit][' '] = 1
for c := '0'; c <= '9'; c++ {
charMap[mode_digit][c] = int(c - '0' + 2)
}
charMap[mode_digit][','] = 12
charMap[mode_digit]['.'] = 13
mixedTable := []int{
0, ' ', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 27, 28, 29, 30, 31, '@', '\\', '^',
'_', '`', '|', '~', 127,
}
for i, v := range mixedTable {
charMap[mode_mixed][v] = i
}
punctTable := []int{
0, '\r', 0, 0, 0, 0, '!', '\'', '#', '$', '%', '&', '\'',
'(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?',
'[', ']', '{', '}',
}
for i, v := range punctTable {
if v > 0 {
charMap[mode_punct][v] = i
}
}
}
func (em encodingMode) BitCount() byte {
if em == mode_digit {
return 4
}
return 5
}
// Create a new state representing this state with a latch to a (not
// necessary different) mode, and then a code.
func (s *state) latchAndAppend(mode encodingMode, value int) *state {
bitCount := s.bitCount
tokens := s.tokens
if mode != s.mode {
latch := latchTable[s.mode][mode]
tokens = newSimpleToken(tokens, latch&0xFFFF, byte(latch>>16))
bitCount += latch >> 16
}
tokens = newSimpleToken(tokens, value, mode.BitCount())
return &state{
mode: mode,
tokens: tokens,
bShiftByteCount: 0,
bitCount: bitCount + int(mode.BitCount()),
}
}
// Create a new state representing this state, with a temporary shift
// to a different mode to output a single value.
func (s *state) shiftAndAppend(mode encodingMode, value int) *state {
tokens := s.tokens
// Shifts exist only to UPPER and PUNCT, both with tokens size 5.
tokens = newSimpleToken(tokens, shiftTable[s.mode][mode], s.mode.BitCount())
tokens = newSimpleToken(tokens, value, 5)
return &state{
mode: s.mode,
tokens: tokens,
bShiftByteCount: 0,
bitCount: s.bitCount + int(s.mode.BitCount()) + 5,
}
}
// Create a new state representing this state, but an additional character
// output in Binary Shift mode.
func (s *state) addBinaryShiftChar(index int) *state {
tokens := s.tokens
mode := s.mode
bitCnt := s.bitCount
if s.mode == mode_punct || s.mode == mode_digit {
latch := latchTable[s.mode][mode_upper]
tokens = newSimpleToken(tokens, latch&0xFFFF, byte(latch>>16))
bitCnt += latch >> 16
mode = mode_upper
}
deltaBitCount := 8
if s.bShiftByteCount == 0 || s.bShiftByteCount == 31 {
deltaBitCount = 18
} else if s.bShiftByteCount == 62 {
deltaBitCount = 9
}
result := &state{
mode: mode,
tokens: tokens,
bShiftByteCount: s.bShiftByteCount + 1,
bitCount: bitCnt + deltaBitCount,
}
if result.bShiftByteCount == 2047+31 {
// The string is as long as it's allowed to be. We should end it.
result = result.endBinaryShift(index + 1)
}
return result
}
// Create the state identical to this one, but we are no longer in
// Binary Shift mode.
func (s *state) endBinaryShift(index int) *state {
if s.bShiftByteCount == 0 {
return s
}
tokens := newShiftToken(s.tokens, index-s.bShiftByteCount, s.bShiftByteCount)
return &state{
mode: s.mode,
tokens: tokens,
bShiftByteCount: 0,
bitCount: s.bitCount,
}
}
// Returns true if "this" state is better (or equal) to be in than "that"
// state under all possible circumstances.
func (this *state) isBetterThanOrEqualTo(other *state) bool {
mySize := this.bitCount + (latchTable[this.mode][other.mode] >> 16)
if other.bShiftByteCount > 0 && (this.bShiftByteCount == 0 || this.bShiftByteCount > other.bShiftByteCount) {
mySize += 10 // Cost of entering Binary Shift mode.
}
return mySize <= other.bitCount
}
func (s *state) toBitList(text []byte) *utils.BitList {
tokens := make([]token, 0)
se := s.endBinaryShift(len(text))
for t := se.tokens; t != nil; t = t.prev() {
tokens = append(tokens, t)
}
res := new(utils.BitList)
for i := len(tokens) - 1; i >= 0; i-- {
tokens[i].appendTo(res, text)
}
return res
}
func (s *state) String() string {
tokens := make([]token, 0)
for t := s.tokens; t != nil; t = t.prev() {
tokens = append([]token{t}, tokens...)
}
return fmt.Sprintf("M:%d bits=%d bytes=%d: %v", s.mode, s.bitCount, s.bShiftByteCount, tokens)
}

75
aztec/token.go Normal file
View File

@ -0,0 +1,75 @@
package aztec
import (
"fmt"
"github.com/boombuler/barcode/utils"
)
type token interface {
fmt.Stringer
prev() token
appendTo(bits *utils.BitList, text []byte)
}
type simpleToken struct {
token
value int
bitCount byte
}
type binaryShiftToken struct {
token
bShiftStart int
bShiftByteCnt int
}
func newSimpleToken(prev token, value int, bitCount byte) token {
return &simpleToken{prev, value, bitCount}
}
func newShiftToken(prev token, bShiftStart int, bShiftCnt int) token {
return &binaryShiftToken{prev, bShiftStart, bShiftCnt}
}
func (st *simpleToken) prev() token {
return st.token
}
func (st *simpleToken) appendTo(bits *utils.BitList, text []byte) {
bits.AddBits(st.value, st.bitCount)
}
func (st *simpleToken) String() string {
value := st.value & ((1 << st.bitCount) - 1)
value |= 1 << st.bitCount
return "<" + fmt.Sprintf("%b", value)[1:] + ">"
}
func (bst *binaryShiftToken) prev() token {
return bst.token
}
func (bst *binaryShiftToken) appendTo(bits *utils.BitList, text []byte) {
for i := 0; i < bst.bShiftByteCnt; i++ {
if i == 0 || (i == 31 && bst.bShiftByteCnt <= 62) {
// We need a header before the first character, and before
// character 31 when the total byte code is <= 62
bits.AddBits(31, 5) // BINARY_SHIFT
if bst.bShiftByteCnt > 62 {
bits.AddBits(bst.bShiftByteCnt-31, 16)
} else if i == 0 {
// 1 <= binaryShiftByteCode <= 62
if bst.bShiftByteCnt < 31 {
bits.AddBits(bst.bShiftByteCnt, 5)
} else {
bits.AddBits(31, 5)
}
} else {
// 32 <= binaryShiftCount <= 62 and i == 31
bits.AddBits(bst.bShiftByteCnt-31, 5)
}
}
bits.AddByte(text[bst.bShiftStart+i])
}
}
func (bst *binaryShiftToken) String() string {
return fmt.Sprintf("<%d::%d>", bst.bShiftStart, (bst.bShiftStart + bst.bShiftByteCnt - 1))
}