diff --git a/aztec/highlevel.go b/aztec/highlevel.go new file mode 100644 index 0000000..fe34c88 --- /dev/null +++ b/aztec/highlevel.go @@ -0,0 +1,171 @@ +package aztec + +import ( + "github.com/boombuler/barcode/utils" +) + +func highlevelEncode(data []byte) *utils.BitList { + states := stateSlice{initialState} + + for index := 0; index < len(data); index++ { + pairCode := 0 + nextChar := byte(0) + if index+1 < len(data) { + nextChar = data[index+1] + } + + switch cur := data[index]; { + case cur == '\r' && nextChar == '\n': + pairCode = 2 + case cur == '.' && nextChar == ' ': + pairCode = 3 + case cur == ',' && nextChar == ' ': + pairCode = 4 + case cur == ':' && nextChar == ' ': + pairCode = 5 + } + if pairCode > 0 { + // We have one of the four special PUNCT pairs. Treat them specially. + // Get a new set of states for the two new characters. + states = updateStateListForPair(states, data, index, pairCode) + index++ + } else { + // Get a new set of states for the new character. + states = updateStateListForChar(states, data, index) + } + } + minBitCnt := int((^uint(0)) >> 1) + var result *state = nil + for _, s := range states { + if s.bitCount < minBitCnt { + minBitCnt = s.bitCount + result = s + } + } + if result != nil { + return result.toBitList(data) + } else { + return new(utils.BitList) + } +} + +func simplifyStates(states stateSlice) stateSlice { + var result stateSlice = nil + for _, newState := range states { + add := true + var newResult stateSlice = nil + + for _, oldState := range result { + if add && oldState.isBetterThanOrEqualTo(newState) { + add = false + } + if !(add && newState.isBetterThanOrEqualTo(oldState)) { + newResult = append(newResult, oldState) + } + } + + if add { + result = append(newResult, newState) + } else { + result = newResult + } + + } + + return result +} + +// We update a set of states for a new character by updating each state +// for the new character, merging the results, and then removing the +// non-optimal states. +func updateStateListForChar(states stateSlice, data []byte, index int) stateSlice { + var result stateSlice = nil + for _, s := range states { + if r := updateStateForChar(s, data, index); len(r) > 0 { + result = append(result, r...) + } + } + return simplifyStates(result) +} + +// Return a set of states that represent the possible ways of updating this +// state for the next character. The resulting set of states are added to +// the "result" list. +func updateStateForChar(s *state, data []byte, index int) stateSlice { + var result stateSlice = nil + ch := data[index] + charInCurrentTable := charMap[s.mode][ch] > 0 + + var stateNoBinary *state = nil + for mode := mode_upper; mode <= mode_punct; mode++ { + charInMode := charMap[mode][ch] + if charInMode > 0 { + if stateNoBinary == nil { + // Only create stateNoBinary the first time it's required. + stateNoBinary = s.endBinaryShift(index) + } + // Try generating the character by latching to its mode + if !charInCurrentTable || mode == s.mode || mode == mode_digit { + // If the character is in the current table, we don't want to latch to + // any other mode except possibly digit (which uses only 4 bits). Any + // other latch would be equally successful *after* this character, and + // so wouldn't save any bits. + res := stateNoBinary.latchAndAppend(mode, charInMode) + result = append(result, res) + } + // Try generating the character by switching to its mode. + if _, ok := shiftTable[s.mode][mode]; !charInCurrentTable && ok { + // It never makes sense to temporarily shift to another mode if the + // character exists in the current mode. That can never save bits. + res := stateNoBinary.shiftAndAppend(mode, charInMode) + result = append(result, res) + } + } + } + if s.bShiftByteCount > 0 || charMap[s.mode][ch] == 0 { + // It's never worthwhile to go into binary shift mode if you're not already + // in binary shift mode, and the character exists in your current mode. + // That can never save bits over just outputting the char in the current mode. + res := s.addBinaryShiftChar(index) + result = append(result, res) + } + return result +} + +// We update a set of states for a new character by updating each state +// for the new character, merging the results, and then removing the +// non-optimal states. +func updateStateListForPair(states stateSlice, data []byte, index int, pairCode int) stateSlice { + var result stateSlice = nil + for _, s := range states { + if r := updateStateForPair(s, data, index, pairCode); len(r) > 0 { + result = append(result, r...) + } + } + return simplifyStates(result) +} + +func updateStateForPair(s *state, data []byte, index int, pairCode int) stateSlice { + var result stateSlice + stateNoBinary := s.endBinaryShift(index) + // Possibility 1. Latch to MODE_PUNCT, and then append this code + result = append(result, stateNoBinary.latchAndAppend(mode_punct, pairCode)) + if s.mode != mode_punct { + // Possibility 2. Shift to MODE_PUNCT, and then append this code. + // Every state except MODE_PUNCT (handled above) can shift + result = append(result, stateNoBinary.shiftAndAppend(mode_punct, pairCode)) + } + if pairCode == 3 || pairCode == 4 { + // both characters are in DIGITS. Sometimes better to just add two digits + digitState := stateNoBinary. + latchAndAppend(mode_digit, 16-pairCode). // period or comma in DIGIT + latchAndAppend(mode_digit, 1) // space in DIGIT + result = append(result, digitState) + } + if s.bShiftByteCount > 0 { + // It only makes sense to do the characters as binary if we're already + // in binary mode. + result = append(result, s.addBinaryShiftChar(index).addBinaryShiftChar(index+1)) + } + return result +} diff --git a/aztec/highlevel_test.go b/aztec/highlevel_test.go new file mode 100644 index 0000000..691d157 --- /dev/null +++ b/aztec/highlevel_test.go @@ -0,0 +1,132 @@ +package aztec + +import ( + "bytes" + "strings" + "testing" + + "github.com/boombuler/barcode/utils" +) + +func bitStr(bl *utils.BitList) string { + buf := new(bytes.Buffer) + + for i := 0; i < bl.Len(); i++ { + if bl.GetBit(i) { + buf.WriteRune('X') + } else { + buf.WriteRune('.') + } + } + return buf.String() +} + +func testHighLevelEncodeString(t *testing.T, s, expectedBits string) { + bits := highlevelEncode([]byte(s)) + result := bitStr(bits) + expectedBits = strings.Replace(expectedBits, " ", "", -1) + + if result != expectedBits { + t.Errorf("invalid result for highlevelEncode(%q). Got:\n%s", s, result) + } +} +func testHighLevelEncodeStringCnt(t *testing.T, s string, expectedBitCnt int) { + bits := highlevelEncode([]byte(s)) + + if bits.Len() != expectedBitCnt { + t.Errorf("invalid result for highlevelEncode(%q). Got %d, expected %d bits", s, bits.Len(), expectedBitCnt) + } +} + +func Test_HighLevelEncode(t *testing.T) { + testHighLevelEncodeString(t, "A. b.", + // 'A' P/S '. ' L/L b D/L '.' + "...X. ..... ...XX XXX.. ...XX XXXX. XX.X") + testHighLevelEncodeString(t, "Lorem ipsum.", + // 'L' L/L 'o' 'r' 'e' 'm' ' ' 'i' 'p' 's' 'u' 'm' D/L '.' + ".XX.X XXX.. X.... X..XX ..XX. .XXX. ....X .X.X. X...X X.X.. X.XX. .XXX. XXXX. XX.X") + testHighLevelEncodeString(t, "Lo. Test 123.", + // 'L' L/L 'o' P/S '. ' U/S 'T' 'e' 's' 't' D/L ' ' '1' '2' '3' '.' + ".XX.X XXX.. X.... ..... ...XX XXX.. X.X.X ..XX. X.X.. X.X.X XXXX. ...X ..XX .X.. .X.X XX.X") + testHighLevelEncodeString(t, "Lo...x", + // 'L' L/L 'o' D/L '.' '.' '.' U/L L/L 'x' + ".XX.X XXX.. X.... XXXX. XX.X XX.X XX.X XXX. XXX.. XX..X") + testHighLevelEncodeString(t, ". x://abc/.", + //P/S '. ' L/L 'x' P/S ':' P/S '/' P/S '/' 'a' 'b' 'c' P/S '/' D/L '.' + "..... ...XX XXX.. XX..X ..... X.X.X ..... X.X.. ..... X.X.. ...X. ...XX ..X.. ..... X.X.. XXXX. XX.X") + // Uses Binary/Shift rather than Lower/Shift to save two bits. + testHighLevelEncodeString(t, "ABCdEFG", + //'A' 'B' 'C' B/S =1 'd' 'E' 'F' 'G' + "...X. ...XX ..X.. XXXXX ....X .XX..X.. ..XX. ..XXX .X...") + + testHighLevelEncodeStringCnt(t, + // Found on an airline boarding pass. Several stretches of Binary shift are + // necessary to keep the bitcount so low. + "09 UAG ^160MEUCIQC0sYS/HpKxnBELR1uB85R20OoqqwFGa0q2uEi"+ + "Ygh6utAIgLl1aBVM4EOTQtMQQYH9M2Z3Dp4qnA/fwWuQ+M8L3V8U=", + 823) +} + +func Test_HighLevelEncodeBinary(t *testing.T) { + // binary short form single byte + testHighLevelEncodeString(t, "N\u0000N", + // 'N' B/S =1 '\0' N + ".XXXX XXXXX ....X ........ .XXXX") // Encode "N" in UPPER + + testHighLevelEncodeString(t, "N\u0000n", + // 'N' B/S =2 '\0' 'n' + ".XXXX XXXXX ...X. ........ .XX.XXX.") // Encode "n" in BINARY + + // binary short form consecutive bytes + testHighLevelEncodeString(t, "N\x00\x80 A", + // 'N' B/S =2 '\0' \u0080 ' ' 'A' + ".XXXX XXXXX ...X. ........ X....... ....X ...X.") + + // binary skipping over single character + testHighLevelEncodeString(t, "\x00a\xFF\x80 A", + // B/S =4 '\0' 'a' '\3ff' '\200' ' ' 'A' + "XXXXX ..X.. ........ .XX....X XXXXXXXX X....... ....X ...X.") + + // getting into binary mode from digit mode + testHighLevelEncodeString(t, "1234\u0000", + //D/L '1' '2' '3' '4' U/L B/S =1 \0 + "XXXX. ..XX .X.. .X.X .XX. XXX. XXXXX ....X ........") + + // Create a string in which every character requires binary + sb := new(bytes.Buffer) + for i := 0; i <= 3000; i++ { + sb.WriteByte(byte(128 + (i % 30))) + } + + // Test the output generated by Binary/Switch, particularly near the + // places where the encoding changes: 31, 62, and 2047+31=2078 + for _, i := range []int{1, 2, 3, 10, 29, 30, 31, 32, 33, 60, 61, 62, 63, 64, 2076, 2077, 2078, 2079, 2080, 2100} { + // This is the expected length of a binary string of length "i" + expectedLength := (8 * i) + switch { + case i <= 31: + expectedLength += 10 + case i <= 62: + expectedLength += 20 + case i <= 2078: + expectedLength += 21 + default: + expectedLength += 31 + } + data := string(sb.Bytes()[:i]) + + // Verify that we are correct about the length. + testHighLevelEncodeStringCnt(t, data, expectedLength) + if i != 1 && i != 32 && i != 2079 { + // The addition of an 'a' at the beginning or end gets merged into the binary code + // in those cases where adding another binary character only adds 8 or 9 bits to the result. + // So we exclude the border cases i=1,32,2079 + // A lower case letter at the beginning will be merged into binary mode + testHighLevelEncodeStringCnt(t, "a"+string(sb.Bytes()[:i-1]), expectedLength) + // A lower case letter at the end will also be merged into binary mode + testHighLevelEncodeStringCnt(t, string(sb.Bytes()[:i-1])+"a", expectedLength) + } + // A lower case letter at both ends will enough to latch us into LOWER. + testHighLevelEncodeStringCnt(t, "a"+data+"b", expectedLength+15) + } +} diff --git a/aztec/state.go b/aztec/state.go new file mode 100644 index 0000000..0d3f0a7 --- /dev/null +++ b/aztec/state.go @@ -0,0 +1,264 @@ +package aztec + +import ( + "fmt" + + "github.com/boombuler/barcode/utils" +) + +type encodingMode byte + +const ( + mode_upper encodingMode = iota // 5 bits + mode_lower // 5 bits + mode_digit // 4 bits + mode_mixed // 5 bits + mode_punct // 5 bits +) + +var ( + // The Latch Table shows, for each pair of Modes, the optimal method for + // getting from one mode to another. In the worst possible case, this can + // be up to 14 bits. In the best possible case, we are already there! + // The high half-word of each entry gives the number of bits. + // The low half-word of each entry are the actual bits necessary to change + latchTable = map[encodingMode]map[encodingMode]int{ + mode_upper: { + mode_upper: 0, + mode_lower: (5 << 16) + 28, + mode_digit: (5 << 16) + 30, + mode_mixed: (5 << 16) + 29, + mode_punct: (10 << 16) + (29 << 5) + 30, + }, + mode_lower: { + mode_upper: (9 << 16) + (30 << 4) + 14, + mode_lower: 0, + mode_digit: (5 << 16) + 30, + mode_mixed: (5 << 16) + 29, + mode_punct: (10 << 16) + (29 << 5) + 30, + }, + mode_digit: { + mode_upper: (4 << 16) + 14, + mode_lower: (9 << 16) + (14 << 5) + 28, + mode_digit: 0, + mode_mixed: (9 << 16) + (14 << 5) + 29, + mode_punct: (14 << 16) + (14 << 10) + (29 << 5) + 30, + }, + mode_mixed: { + mode_upper: (5 << 16) + 29, + mode_lower: (5 << 16) + 28, + mode_digit: (10 << 16) + (29 << 5) + 30, + mode_mixed: 0, + mode_punct: (5 << 16) + 30, + }, + mode_punct: { + mode_upper: (5 << 16) + 31, + mode_lower: (10 << 16) + (31 << 5) + 28, + mode_digit: (10 << 16) + (31 << 5) + 30, + mode_mixed: (10 << 16) + (31 << 5) + 29, + mode_punct: 0, + }, + } + // A map showing the available shift codes. (The shifts to BINARY are not shown) + shiftTable = map[encodingMode]map[encodingMode]int{ + mode_upper: { + mode_punct: 0, + }, + mode_lower: { + mode_punct: 0, + mode_upper: 28, + }, + mode_mixed: { + mode_punct: 0, + }, + mode_digit: { + mode_punct: 0, + mode_upper: 15, + }, + } + charMap map[encodingMode][]int +) + +type state struct { + mode encodingMode + tokens token + bShiftByteCount int + bitCount int +} +type stateSlice []*state + +var initialState *state = &state{ + mode: mode_upper, + tokens: nil, + bShiftByteCount: 0, + bitCount: 0, +} + +func init() { + charMap = make(map[encodingMode][]int) + charMap[mode_upper] = make([]int, 256) + charMap[mode_lower] = make([]int, 256) + charMap[mode_digit] = make([]int, 256) + charMap[mode_mixed] = make([]int, 256) + charMap[mode_punct] = make([]int, 256) + + charMap[mode_upper][' '] = 1 + for c := 'A'; c <= 'Z'; c++ { + charMap[mode_upper][int(c)] = int(c - 'A' + 2) + } + + charMap[mode_lower][' '] = 1 + for c := 'a'; c <= 'z'; c++ { + charMap[mode_lower][c] = int(c - 'a' + 2) + } + charMap[mode_digit][' '] = 1 + for c := '0'; c <= '9'; c++ { + charMap[mode_digit][c] = int(c - '0' + 2) + } + charMap[mode_digit][','] = 12 + charMap[mode_digit]['.'] = 13 + + mixedTable := []int{ + 0, ' ', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 27, 28, 29, 30, 31, '@', '\\', '^', + '_', '`', '|', '~', 127, + } + for i, v := range mixedTable { + charMap[mode_mixed][v] = i + } + + punctTable := []int{ + 0, '\r', 0, 0, 0, 0, '!', '\'', '#', '$', '%', '&', '\'', + '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', + '[', ']', '{', '}', + } + for i, v := range punctTable { + if v > 0 { + charMap[mode_punct][v] = i + } + } +} + +func (em encodingMode) BitCount() byte { + if em == mode_digit { + return 4 + } + return 5 +} + +// Create a new state representing this state with a latch to a (not +// necessary different) mode, and then a code. +func (s *state) latchAndAppend(mode encodingMode, value int) *state { + bitCount := s.bitCount + tokens := s.tokens + + if mode != s.mode { + latch := latchTable[s.mode][mode] + tokens = newSimpleToken(tokens, latch&0xFFFF, byte(latch>>16)) + bitCount += latch >> 16 + } + tokens = newSimpleToken(tokens, value, mode.BitCount()) + return &state{ + mode: mode, + tokens: tokens, + bShiftByteCount: 0, + bitCount: bitCount + int(mode.BitCount()), + } +} + +// Create a new state representing this state, with a temporary shift +// to a different mode to output a single value. +func (s *state) shiftAndAppend(mode encodingMode, value int) *state { + tokens := s.tokens + + // Shifts exist only to UPPER and PUNCT, both with tokens size 5. + tokens = newSimpleToken(tokens, shiftTable[s.mode][mode], s.mode.BitCount()) + tokens = newSimpleToken(tokens, value, 5) + + return &state{ + mode: s.mode, + tokens: tokens, + bShiftByteCount: 0, + bitCount: s.bitCount + int(s.mode.BitCount()) + 5, + } +} + +// Create a new state representing this state, but an additional character +// output in Binary Shift mode. +func (s *state) addBinaryShiftChar(index int) *state { + tokens := s.tokens + mode := s.mode + bitCnt := s.bitCount + if s.mode == mode_punct || s.mode == mode_digit { + latch := latchTable[s.mode][mode_upper] + tokens = newSimpleToken(tokens, latch&0xFFFF, byte(latch>>16)) + bitCnt += latch >> 16 + mode = mode_upper + } + deltaBitCount := 8 + if s.bShiftByteCount == 0 || s.bShiftByteCount == 31 { + deltaBitCount = 18 + } else if s.bShiftByteCount == 62 { + deltaBitCount = 9 + } + result := &state{ + mode: mode, + tokens: tokens, + bShiftByteCount: s.bShiftByteCount + 1, + bitCount: bitCnt + deltaBitCount, + } + if result.bShiftByteCount == 2047+31 { + // The string is as long as it's allowed to be. We should end it. + result = result.endBinaryShift(index + 1) + } + + return result +} + +// Create the state identical to this one, but we are no longer in +// Binary Shift mode. +func (s *state) endBinaryShift(index int) *state { + if s.bShiftByteCount == 0 { + return s + } + tokens := newShiftToken(s.tokens, index-s.bShiftByteCount, s.bShiftByteCount) + return &state{ + mode: s.mode, + tokens: tokens, + bShiftByteCount: 0, + bitCount: s.bitCount, + } +} + +// Returns true if "this" state is better (or equal) to be in than "that" +// state under all possible circumstances. +func (this *state) isBetterThanOrEqualTo(other *state) bool { + mySize := this.bitCount + (latchTable[this.mode][other.mode] >> 16) + + if other.bShiftByteCount > 0 && (this.bShiftByteCount == 0 || this.bShiftByteCount > other.bShiftByteCount) { + mySize += 10 // Cost of entering Binary Shift mode. + } + return mySize <= other.bitCount +} + +func (s *state) toBitList(text []byte) *utils.BitList { + tokens := make([]token, 0) + se := s.endBinaryShift(len(text)) + + for t := se.tokens; t != nil; t = t.prev() { + tokens = append(tokens, t) + } + res := new(utils.BitList) + for i := len(tokens) - 1; i >= 0; i-- { + tokens[i].appendTo(res, text) + } + return res +} + +func (s *state) String() string { + tokens := make([]token, 0) + for t := s.tokens; t != nil; t = t.prev() { + tokens = append([]token{t}, tokens...) + } + return fmt.Sprintf("M:%d bits=%d bytes=%d: %v", s.mode, s.bitCount, s.bShiftByteCount, tokens) +} diff --git a/aztec/token.go b/aztec/token.go new file mode 100644 index 0000000..aac0f7a --- /dev/null +++ b/aztec/token.go @@ -0,0 +1,75 @@ +package aztec + +import ( + "fmt" + + "github.com/boombuler/barcode/utils" +) + +type token interface { + fmt.Stringer + prev() token + appendTo(bits *utils.BitList, text []byte) +} + +type simpleToken struct { + token + value int + bitCount byte +} + +type binaryShiftToken struct { + token + bShiftStart int + bShiftByteCnt int +} + +func newSimpleToken(prev token, value int, bitCount byte) token { + return &simpleToken{prev, value, bitCount} +} +func newShiftToken(prev token, bShiftStart int, bShiftCnt int) token { + return &binaryShiftToken{prev, bShiftStart, bShiftCnt} +} + +func (st *simpleToken) prev() token { + return st.token +} +func (st *simpleToken) appendTo(bits *utils.BitList, text []byte) { + bits.AddBits(st.value, st.bitCount) +} +func (st *simpleToken) String() string { + value := st.value & ((1 << st.bitCount) - 1) + value |= 1 << st.bitCount + return "<" + fmt.Sprintf("%b", value)[1:] + ">" +} + +func (bst *binaryShiftToken) prev() token { + return bst.token +} +func (bst *binaryShiftToken) appendTo(bits *utils.BitList, text []byte) { + for i := 0; i < bst.bShiftByteCnt; i++ { + if i == 0 || (i == 31 && bst.bShiftByteCnt <= 62) { + // We need a header before the first character, and before + // character 31 when the total byte code is <= 62 + bits.AddBits(31, 5) // BINARY_SHIFT + if bst.bShiftByteCnt > 62 { + bits.AddBits(bst.bShiftByteCnt-31, 16) + } else if i == 0 { + // 1 <= binaryShiftByteCode <= 62 + if bst.bShiftByteCnt < 31 { + bits.AddBits(bst.bShiftByteCnt, 5) + } else { + bits.AddBits(31, 5) + } + } else { + // 32 <= binaryShiftCount <= 62 and i == 31 + bits.AddBits(bst.bShiftByteCnt-31, 5) + } + } + bits.AddByte(text[bst.bShiftStart+i]) + } +} + +func (bst *binaryShiftToken) String() string { + return fmt.Sprintf("<%d::%d>", bst.bShiftStart, (bst.bShiftStart + bst.bShiftByteCnt - 1)) +}