diff --git a/kappa123/server/intpack.py b/kappa123/server/intpack.py new file mode 100644 index 0000000..c36b3e0 --- /dev/null +++ b/kappa123/server/intpack.py @@ -0,0 +1,92 @@ +import base64 + + +def pad_count(n, b): + if n % b == 0: + return 0 + else: + return b - (n % b) + + +def pack_ints(l): + """ + Pack a set of ints into a URL-safe string that can be used in a URL path + component. + """ + + def do_pack(s, size): + bits = [] + for i in s: + b = '{:b}'.format(i) + b = '0' * pad_count(len(b), size - 1) + b + for j in range(0, len(b), size - 1): + bits.append(str(int(j == 0))) # lol + bits.extend(b[j:j+(size-1)]) + return bits + + diffs = [] + last = 0 + for i in sorted(set(l)): + diffs.append(i - last) + last = i + + best, size = do_pack(diffs, 8), 8 + for i in range(3, 8): + attempt = do_pack(diffs, i) + if len(attempt) < len(best): + best, size = attempt, i + + # Padding works by adding 0b1, then padding the rest of the byte with 0b0. + # In "UTF-N" (UTF-5 but with any surrogate length) the first surrogate in + # the padding must refer to the value 0, because 0b10... cannot have any + # following 0b0... surrogates, and a list of differences of an ordered set + # cannot contain 0, so it can be safely used as a "stop processing" marker. + pad_size = pad_count(3 + len(best), 8) + if pad_size == 0: + padding = '' + else: + padding = list('1' + '0' * (pad_size - 1)) + bits = list('{:03b}'.format(size - 1)) + best + padding + bytes = bytearray(int(''.join(bits[i:i+8]), 2) + for i in range(0, len(bits), 8)) + return base64.b64encode(bytes) + + +def unpack_ints(s): + """ + Unpack a set of ints generated by pack_ints. + """ + + bytes = base64.b64decode(s) + if len(bytes) == 0: + return [] + bits = ''.join(item for sublist in ('{:08b}'.format(b) for b in bytes) + for item in sublist) + size, bits = int(bits[:3], 2) + 1, bits[3:] + diffs = [] + for i in range(0, len(bits), size): + surr = bits[i:i+size] + if surr[0] == '1': + if len(surr) == 1 or int(surr[1:], 2) == 0: # stop marker + break + diffs.append(int(surr[1:], 2)) + else: + diffs[len(diffs)-1] <<= (size - 1) + diffs[len(diffs)-1] += int(surr[1:], 2) + + l = [] + last = 0 + for i in diffs: + last += i + l.append(last) + return l + + +if __name__ == '__main__': + with open('/home/ilianaw/rrix.txt') as f: + s = sorted(set(int(x) for x in f.readlines())) + print(s) + packed = pack_ints(s).decode('ascii') + print(packed) + unpacked = unpack_ints(packed) + print(unpacked) diff --git a/kappa123/server/main/views.py b/kappa123/server/main/views.py index e5a6156..1683214 100644 --- a/kappa123/server/main/views.py +++ b/kappa123/server/main/views.py @@ -7,13 +7,9 @@ import botocore.session from flask import render_template, Blueprint, request, redirect -#from aaaaaaa import pack_ints, unpack_ints +from kappa123.server.intpack import pack_ints, unpack_ints from kappa123.server.runs import inject_run_class, fetch_or_cache_runs -def pack_ints(s): - return "butts" -def unpack_ints(s): - return set() ################ #### config ####