Human readable base conversion
Code review time… In a conversation about URL shorteners and “Coke Rewards” realized that there was a case where I needed to be able to generate safe character strings that had high reliability for input back by human beings. The typical Base62 systems where there is ambiguity between (O, o and 0) make things hard (along with all of those upper vs. lower case cases).
Here’s the quick module I put together that is a safe base converter to human readable numbers.
1
2import types
3
4class BaseConverter(object):
5 """ Convert a number between two bases of digits, by default it’s a human safe set
6
7 >>> v = BaseConverter(BaseConverter.BASE10)
8 >>> v.to_decimal(22)
9 22
10 >>> v.from_decimal(22)
11 ’22’
12
13 >>> v = BaseConverter(BaseConverter.BASE2)
14 >>> v.to_decimal(22)
15 Traceback (most recent call last):
16 …
17 ValueError: character ‘2’ not in base
18 >>> v.to_decimal(10)
19 2
20 >>> v.to_decimal(’10’)
21 2
22 >>> v.from_decimal(22)
23 ‘10110’
24
25 >>> v = BaseConverter()
26 >>> v.to_decimal(22)
27 58
28 >>> v.from_decimal(123123)
29 ‘5h17’
30 >>> v.to_decimal(‘5H17’)
31 123123
32
33 >>> v = BaseConverter(BaseConverter.BASE62)
34 >>> v.from_decimal(257938572394L)
35 ‘4XYBxik’
36 >>> v.to_decimal(‘4XYBxik’)
37 257938572394
38
39 >>> v = BaseConverter(((‘Zero ‘,),(‘One ‘,)))
40 >>> v.from_decimal(BaseConverter(BaseConverter.BASE2).to_decimal(‘1101’))
41 ‘One One Zero One ‘
42
43 """
44
45 HUMAN_TABLE = (
46 (‘0′,’O’,’o’,’Q’,’q’),
47 (‘1′,’I’,’i’,’L’,’l’,’J’,’j’),
48 (‘2′,’Z’,’z’),
49 (‘3’,),
50 (‘4’,),
51 (‘5′,’S’,’s’),
52 (‘6’,),
53 (‘7’,),
54 (‘8’,),
55 (‘9’,),
56 (‘a’,’A’,),
57 (‘b’,’B’,),
58 (‘c’,’C’,),
59 (‘d’,’D’,),
60 (‘e’,’E’,),
61 (‘f’,’F’,),
62 (‘g’,’G’,),
63 (‘h’,’H’,),
64 (‘k’,’K’,),
65 (‘m’,’M’,),
66 (‘n’,’N’,),
67 (‘p’,’P’,),
68 (‘r’,’R’,),
69 (‘t’,’T’,),
70 (‘u’,’U’,’V’,’v’),
71 (‘w’,’W’,),
72 (‘x’,’X’,),
73 (‘y’,’Y’,),
74 )
75
76 BASE2 = "01"
77 BASE10 = "0123456789"
78 BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
79 BASE16 = (
80 (‘0’,),
81 (‘1’,),
82 (‘2’,),
83 (‘3’,),
84 (‘4’,),
85 (‘5’,),
86 (‘6’,),
87 (‘7’,),
88 (‘8’,),
89 (‘9’,),
90 (‘A’,’a’,),
91 (‘B’,’b’,),
92 (‘C’,’c’,),
93 (‘D’,’d’,),
94 (‘E’,’e’,),
95 (‘F’,’f’,),
96 )
97
98 def __init__(self, digitset=HUMAN_TABLE):
99 if type(digitset) in (types.StringType, types.UnicodeType) :
100 self.digitset = [(v) for v in digitset]
101 else :
102 self.digitset = digitset
103
104 self.base = len(self.digitset)
105 self.output_map = {}
106
107 self.output_digits = [v[0] for v in self.digitset]
108 self.input_set = {}
109 for idx, l in enumerate(self.digitset) :
110 for k in l :
111 self.input_set[k] = idx
112
113 #print ‘OUT DIGITS’, self.output_digits
114 #print ‘INPUT SET’, self.input_set
115
116 def from_decimal(self, i):
117 return self.convert(i, self.BASE10, self.output_digits)
118
119 def to_decimal(self, s):
120 return int(self.convert(s, self.input_set, self.BASE10))
121
122 def convert(self, number, fromdigits, todigits) :
123 fd = fromdigits
124 fbase = self.base
125 if type(fromdigits) in (types.StringType, types.UnicodeType) :
126 fbase = len(fromdigits)
127 fd = dict([(fromdigits[idx], idx) for idx in range(0,len(fromdigits))])
128
129 return self._convert(number, fbase, fd, todigits)
130
131 @staticmethod
132 def _convert(number, fbase, fromdigits, todigits) :
133 # Based on http://code.activestate.com/recipes/111286/
134 number = str(number)
135
136 if number[0] == ‘-‘:
137 number = number[1:]
138 neg = 1
139 else:
140 neg = 0
141
142 # make an integer out of the number
143 x = 0
144 #print "fbase = ", len(fromdigits)
145 for digit in number :
146 try :
147 x = x * fbase + fromdigits[digit]
148 except KeyError, e:
149 raise ValueError("character ‘%s’ not in base" % digit)
150
151 # create the result in base ‘len(todigits)’
152 tbase = len(todigits)
153 if x == 0:
154 res = todigits[0]
155 else:
156 res = ""
157 while x > 0:
158 #print "divmod(%d, %d) = %r" % (x, tbase, divmod(x,tbase))
159 x, digit = divmod(x, tbase)
160 res = todigits[digit] + res
161
162 if neg:
163 res = ‘-‘ + res
164 return res
165
166binary = BaseConverter(BaseConverter.BASE2)
167hex = BaseConverter(BaseConverter.BASE16)
168base62 = BaseConverter(BaseConverter.BASE62)
169human = BaseConverter()
170
171if __name__ == ‘__main__’ :
172 import doctest
173 import random
174 doctest.testmod()