import base64 def pad_count(n, b): if n % b == 0: return 0 else: return b - (n % b) def pack_ints(l): """ Pack a set of ints into a URL-safe string that can be used in a URL path component. """ def do_pack(s, size): bits = [] for i in s: b = '{:b}'.format(i) b = '0' * pad_count(len(b), size - 1) + b for j in range(0, len(b), size - 1): bits.append(str(int(j == 0))) # lol bits.extend(b[j:j+(size-1)]) return bits diffs = [] last = 0 for i in sorted(set(l)): diffs.append(i - last) last = i best, size = do_pack(diffs, 8), 8 for i in range(3, 8): attempt = do_pack(diffs, i) if len(attempt) < len(best): best, size = attempt, i # Padding works by adding 0b1, then padding the rest of the byte with 0b0. # In "UTF-N" (UTF-5 but with any surrogate length) the first surrogate in # the padding must refer to the value 0, because 0b10... cannot have any # following 0b0... surrogates, and a list of differences of an ordered set # cannot contain 0, so it can be safely used as a "stop processing" marker. pad_size = pad_count(3 + len(best), 8) if pad_size == 0: padding = '' else: padding = list('1' + '0' * (pad_size - 1)) bits = list('{:03b}'.format(size - 1)) + best + padding bytes = bytearray(int(''.join(bits[i:i+8]), 2) for i in range(0, len(bits), 8)) return base64.b64encode(bytes) def unpack_ints(s): """ Unpack a set of ints generated by pack_ints. """ bytes = base64.b64decode(s) if len(bytes) == 0: return [] bits = ''.join(item for sublist in ('{:08b}'.format(b) for b in bytes) for item in sublist) size, bits = int(bits[:3], 2) + 1, bits[3:] diffs = [] for i in range(0, len(bits), size): surr = bits[i:i+size] if surr[0] == '1': if len(surr) == 1 or int(surr[1:], 2) == 0: # stop marker break diffs.append(int(surr[1:], 2)) else: diffs[len(diffs)-1] <<= (size - 1) diffs[len(diffs)-1] += int(surr[1:], 2) l = [] last = 0 for i in diffs: last += i l.append(last) return l if __name__ == '__main__': with open('/home/ilianaw/rrix.txt') as f: s = sorted(set(int(x) for x in f.readlines())) print(s) packed = pack_ints(s).decode('ascii') print(packed) unpacked = unpack_ints(packed) print(unpacked)