import sys, struct, math, argparse, copy, operator
from enum import Enum
from functools import reduce
from PIL import Image
class Chunk:
def __init__(self, words):
if len(words) != 16:
raise Exception('chunk must be 16 words')
self.words = words
def reduce_8(self):
reduced = []
for y in range(0, 16, 2):
x = self.words[y] | self.words[y + 1]
x = (x & 0xAAAA) >> 1 | (x & 0x5555)
reduced.append(x)
return reduced
def reduce_4(self):
reduced = []
words = self.reduce_8()
for y in range(0, 8, 2):
x = words[y] | words[y + 1]
x = (x >> 2) | x
reduced.append(x)
return reduced
def reduce_2(self):
a = self.words[0] | self.words[1] | self.words[2] | self.words[3] | self.words[4] | self.words[5] | self.words[6] | self.words[7]
b = self.words[8] | self.words[9] | self.words[10] | self.words[11] | self.words[12] | self.words[13] | self.words[14] | self.words[15]
return ((a & 0xFF00) == 0) | ((a & 0x00FF) == 0) << 1 | ((b & 0xFF00) == 0) << 2 | ((b & 0x00FF) == 0) << 3
def pixel_count(self):
return sum(map(int.bit_count, self.words))
def pixels_changed(self, other):
return sum(map(lambda v: (v[0] ^ v[1]).bit_count(), zip(self.words, other.words)))
def similarity(self, other):
s = 0
if self.reduce_8() == other.reduce_8():
s += 32
if self.reduce_4() == other.reduce_4():
s += 16
if self.reduce_2() == other.reduce_2():
s += 8
s -= abs(self.pixel_count() - other.pixel_count()) // 16
s -= self.pixels_changed(other) // 16
return s
def merge(self, other):
for i in range(0, 16):
if i % 2 == 0:
self.words[i] = (self.words[i] & 0xAAAA) | (other.words[i] & 0x5555)
else:
self.words[i] = (self.words[i] & 0x5555) | (other.words[i] & 0xAAAA)
def __eq__(self, other):
return self.words == other.words
def __hash__(self):
return hash((self.words[0], self.words[1], self.words[2], self.words[3], self.words[4], self.words[5], self.words[6], self.words[7], self.words[8], self.words[9], self.words[10], self.words[11], self.words[12], self.words[13], self.words[14], self.words[15]))
def __str__(self):
return "\n" + "\n".join(map(lambda x: f'{x:016b}', self.words))
known_chunks = {
0xFFF: Chunk([0xFFFF] * 16),
0xFFE: Chunk([0x0000] * 16),
}
known_chunks_rev = {
Chunk([0xFFFF] * 16): 0xFFF,
Chunk([0x0000] * 16): 0xFFE,
}
class ImgType(Enum):
Normal = 1
Mask = 2
class SubImage:
def __init__(self, im, type):
self.chunks = []
self.indexes = []
self.type = type
for y in range(0, im.height, 16):
for x in range(0, im.width, 16):
words = []
for cy in range(0, 16):
w = 0
for cx in range(0, 16):
p = im.getpixel((x + cx, y + cy))
w >>= 1
w |= (int(p != 0) << 15)
words.append(w)
c = Chunk(words)
try:
i = self.chunk_index(c)
except ValueError:
i = len(self.chunks)
self.chunks.append(c)
self.indexes.append(i)
def chunk_at(self, i):
if i in known_chunks:
return known_chunks[i]
else:
return self.chunks[i]
def chunk_index(self, c):
if c in known_chunks_rev:
return known_chunks_rev[c]
else:
return self.chunks.index(c)
def histogram(self):
histogram = {}
for i in self.indexes:
if i > 0xFF0: continue
if i in histogram:
histogram[i] += 1
else:
histogram[i] = 1
histogram = list(histogram.items())
histogram.sort(key=lambda v: v[1])
return histogram
def dump_histogram(self):
histogram = self.histogram()
print('Chunk count')
for p in histogram:
c = p[0]
n = p[1]
print(f'{c} {n}')
def replace_index(self, idx1, idx2):
for i in range(0, len(self.indexes)):
if self.indexes[i] == idx1:
self.indexes[i] = idx2
def disk_size(self):
return 3 + len(self.chunks) * 32 + 3 + math.ceil(len(self.indexes) / 2) * 3
def deduplicate(self, similarity_threshold):
histogram = self.histogram()
dead_chunks = []
for i in range(0, len(histogram) - 1):
idx1 = histogram[i][0]
a = self.chunks[idx1]
similar_chunks = list(map(
lambda v: (v[0], v[1], a.similarity(self.chunks[v[0]])),
reversed(histogram[i + 1:])
))
similar_chunks.sort(key=lambda v: v[2], reverse=True)
if similar_chunks[0][2] < similarity_threshold:
continue
idx2 = similar_chunks[0][0]
if args.merge:
self.chunks[idx2].merge(self.chunks[idx1])
self.replace_index(idx1, idx2)
dead_chunks.append(idx1)
dead_chunks.sort(reverse=True)
for idx in dead_chunks:
self.chunks = self.chunks[:idx] + self.chunks[idx+1:]
for i in range(0, len(self.indexes)):
if self.indexes[i] > 0xFF0:
continue
if self.indexes[i] == idx:
raise Exception('dead index found: ' + str(idx))
if self.indexes[i] > idx:
self.indexes[i] -= 1
return len(dead_chunks)
def into_image(self, width, height):
im = Image.new('1', (width, height))
x = 0
y = 0
for idx in self.indexes:
c = self.chunk_at(idx)
for cy in range(0, 16):
w = c.words[cy]
for cx in range(0, 16):
im.putpixel((x + cx, y + cy), (w >> cx) & 1)
x += 16
if x == im.width:
x = 0
y += 16
return im
def write(self, f):
base_type = 8 * self.type.value
f.write(struct.pack("<BH", base_type + 0, len(self.chunks)))
for c in self.chunks:
for w in c.words:
f.write(struct.pack("<H", w))
f.write(struct.pack("<B", base_type + 1))
c = 0
for i in range(0, len(self.indexes), 2):
b1 = self.indexes[i] & 0xFF
b2 = (self.indexes[i] & 0xF00) >> 8
if i + 1 < len(self.indexes):
b2 |= (self.indexes[i+1] & 0xF00) >> 4
b3 = self.indexes[i+1] & 0xFF
else:
b3 = 0
f.write(struct.pack("<BBB", b1, b2, b3))
c += 3
print(c, "packed index bytes")
class ChunkedImage:
def __init__(self, im):
if im.width % 16 != 0 or im.height % 16 != 0:
raise Exception('width and height must be multiples of 16')
self.width = im.width
self.height = im.height
self.subimages = [SubImage(im, ImgType.Normal)]
def add_mask(self, im):
if im.width != self.width or im.height != self.height:
raise Exception('mask dimensions do not match image')
self.subimages.append(SubImage(im, ImgType.Mask))
def disk_size(self):
return 4 + sum(map(lambda i: i.disk_size(), self.subimages))
def write_to_file(self, filename):
f = open(filename, 'wb')
f.write(b"\xA7ci\x00")
chunk_width = int(self.width / 16)
chunk_height = int(self.height / 16)
f.write(struct.pack('<BB', chunk_width, chunk_height))
for i in self.subimages:
i.write(f)
f.close()
def into_image(self):
base = self.subimages[0].into_image(self.width, self.height)
if len(self.subimages) > 1:
base = base.convert('RGBA')
mask = self.subimages[1].into_image(self.width, self.height)
transparent = Image.new('RGBA', (self.width, self.height), color='#FF000000')
base = Image.composite(base, transparent, mask)
return base
def parse_args():
parser = argparse.ArgumentParser(prog='chunk-compress')
parser.add_argument('input_file')
parser.add_argument('output_file')
parser.add_argument('-s', '--similarity', type=int, default=25, help='similarity rating')
parser.add_argument('-t', '--target-size', type=int, help='decrease similarity incrementally to hit a target size')
parser.add_argument('-m', '--merge', action='store_true', help='merge chunks instead of replacing them')
parser.add_argument('--no-dedup', action='store_true', help='do not run deduplication step (identical and well-known chunks will still be deduplicated)')
parser.add_argument('--mask', help='Add a mask image')
parser.add_argument('--png', help='also output a PNG proofing image')
return parser.parse_args()
args = parse_args()
im = Image.open(args.input_file)
ci = ChunkedImage(im)
if args.mask:
cim = Image.open(args.mask)
ci.add_mask(cim)
original_chunk_len = len(ci.subimages[0].chunks)
print(f'{original_chunk_len} distinct chunks')
if not args.no_dedup:
if args.target_size:
similarity = args.similarity
while True:
print('attempting deduplication with similarity', similarity)
ci_temp = copy.deepcopy(ci)
dead_chunk_len = ci_temp.subimages[0].deduplicate(similarity)
if ci_temp.disk_size() <= args.target_size:
ci = ci_temp
break
similarity -= 1
else:
print('deduplicating chunks with similarity', args.similarity)
dead_chunk_len = ci.subimages[0].deduplicate(args.similarity)
print('deduplicated', dead_chunk_len, 'chunks', f'({int(dead_chunk_len / original_chunk_len * 100):d}% reduction)')
print('size', ci.disk_size())
if args.png:
im = ci.into_image()
im.save(args.png)
ci.write_to_file(args.output_file)