11 March 2016 - Forum Rules

Main Menu


Started by creeperton, October 23, 2014, 08:00:21 PM

Previous topic - Next topic




How intriguing...

I found this Python implementation of LZSS, but alas, I can't see where the ring buffer would be initialized with spaces.  (It's more obvious in this C code.)
This signature is an illusion and is a trap devisut by Satan. Go ahead dauntlessly! Make rapid progres!


Not the best implementation, but it is one.  Set fill=b'\x00' if you want binary zeroes instead of spaces.  Just to warn you, it's a tad slow in C so don't expect sterling results from an interpreted language like Python.

Basically, you'd just call:
encode(data, fill=b'\x00')
-unless you're lazy and change it in the def.

    def encode(data, ring=4096, threshold=2, idx=4078, fill=b' ', byteorder='little'):
        A fairly straight port of Haruhiko Okumura's LZSS compressor from C.
        Nothing really clever, and not very pythonic.  (I'm not that good at this honestly.)
        idx is typically ring - limit, though some variations start at the beginning instead.
        Limit is computed as 16bits - #bits to express ring, plus the threshold.  For that reason, rings should always be powers of two.
        A fill value of 0 is adequate for 'binary' files.  LZSS was originally developed for text, based on the LZ76 string complexity algorithm.
        output = bytearray()
        if not data: return output

        from itertools import repeat
        from array import array

        # Limit is max encodable length + threshold.
        limit = 1<<(16 - int.bit_length(ring-1))
        limit+= threshold

        # Initialize the ring buffer with a common fill value.
        if isinstance(fill, (bytes, bytearray)):
            fill = fill[0]
        rng = bytearray(repeat(fill, ring + limit))
        matchlen, matchpos, nil = 0, 0, ring
        # Initialize the trees.
        lson= array('i', repeat(0, ring + 1))
        rson= array('i', repeat(0, ring + 1))
        rson.extend(repeat(nil, 256))
        dad = array('i', repeat(nil, ring + 1))

        def InsertNode(v):
            """Inserts string into trees and returns
            longest match position and length as a tuple.
            If match length equals the limit, it replaces the old node.
            By embedding this function it can use the trees and ring globally."""
            cmp, ml, mp = 1, 0, matchpos
            key = rng[v:]
            pos = ring + 1 + key[0]
            rson[v] = nil
            lson[v] = nil
            while True:
                if cmp>=0:
                    if rson[pos] != nil:
                        pos = rson[pos]
                        rson[pos] = v
                        dad[v] = pos
                        return (ml, mp)
                    if lson[pos] != nil:
                        pos = lson[pos]
                        lson[pos] = v
                        dad[v] = pos
                        return (ml, mp)
                for i in range(1, limit+1):
                    cmp = key[i] - rng[pos + i]
                    if cmp:
                if i > ml:
                    mp, ml = pos, i
                    if ml >= limit: break
            dad[v] = dad[pos]
            dad[lson[pos]] = v
            dad[rson[pos]] = v
            if rson[dad[pos]] == pos:
                rson[dad[pos]] = v
                lson[dad[pos]] = v
            dad[pos] = nil
            return (ml, mp)

        def DeleteNode(pos):
            if dad[pos] == nil: return
            # If it's in the tree, delete it.
            q = lson[pos]
            if lson[pos] == nil:
                q = rson[pos]
            elif rson[pos] != nil:
                if rson[q] != nil:
                    while True:
                        q = rson[q]
                        if rson[q] == nil: break
                    rson[dad[q]] = lson[q]
                    dad[lson[q]] = dad[q]
                    lson[q] = lson[pos]
                    dad[lson[pos]] = q
                rson[q] = rson[pos]
                dad[rson[pos]] = q
            dad[q] = dad[pos]
            if rson[dad[pos]] == pos:
                rson[dad[pos]] = q
                lson[dad[pos]] = q
            dad[pos] = nil

        # Unset flags on copies; send when less than 256.
        mask = 0xFF00
        codebuf = bytearray()
        s = 0
        # Read limit bytes into the ring at idx.
        p = min(limit, len(data)-1)
        rng[idx:idx+p] = data[0:p]
        cur = p
        for i in range(1, limit+1):
            matchlen, matchpos = InsertNode(idx - i)
        matchlen, matchpos = InsertNode(idx)
        # Now you're initialized, so do the rest of the file.
        while True:
            matchlen = min(matchlen, p)
            if matchlen <= threshold:
                matchlen = 1
                mask ^= 128
                ml = (matchlen - threshold - 1)
                maskr = 8 - int.bit_length(ring - 1)
                maskl = 8 - int.bit_length(limit - threshold - 1)
                a = ml&0xFF
                b = matchpos&0xFF
                if maskl>0:
                    a|= (matchpos>>maskl) & (0xFF00>>maskl)
                elif maskr>0:
                    b|= (matchpos>>maskr) & (0xFF00>>maskr)
                if byteorder == 'little':
            # Flush when the mask is full.
            if mask < 256:
                codebuf = bytearray()
                mask = 0xFF00
            prevmatchlen = matchlen
            j = min(prevmatchlen, len(data)-cur)
            for i in range(j):
                rng[s] = data[cur]
                if s < (limit - 1):
                    rng[s + ring] = data[cur]
                # Correct the ring position via modulo ring
                s&= ring-1
                idx&= ring-1
                matchlen, matchpos = InsertNode(idx)
            # Flush the rest of the buffer if necessary.
            for i in range(j, prevmatchlen):
                s&= ring-1
                idx&= ring-1
                if p:
                    matchlen, matchpos = InsertNode(idx)
            # Loop until source empty.
            if not p:
        # Flush remaining output; mask the lead bit off the mask.
        if codebuf:
            ## mask &= ~(1<<mask.bit_length()-1)   # bottom to top bitorder.
            l = mask.bit_length() - 8
            mask &= 0xFF
        return output