[Ogre Battle/LZSS] I can't get my compression routine correct.

Started by Mauron, September 21, 2010, 05:42:58 PM

Previous topic - Next topic

Mauron

Comments on problem are in the routine below. It borrows heavily from Geiger's routine for Chrono Trigger.

public List<byte> Decompressed = new List<byte>();
        public List<byte> Compressed = new List<byte>();
        public int CompressedSize;
        public int UncompressedSize;
        public bool ReadyToSave = false;
        public List<byte> Original = new List<byte>();
        private int ROMPosition;


        public bool Compress(List<byte> RawData, bool IncreasedSize)
        {
            Compressed.Clear();
            int MaxSize = IncreasedSize ? RawData.Count : UncompressedSize;
            Compressed.Add((byte)(MaxSize & 0xFF));
            Compressed.Add((byte)((MaxSize & 0xFF00) / 0x100));
            short j;
            short k;
            byte BitCounter;
            ushort Offset;
            ushort CopyLength;
            int SourcePosition;
            ushort WorkingPosition;
            byte[] TempData = new byte[0x10000];
            ushort PacketHeaderOffset;
            ushort Range;
            ushort CurrentSize = 0xFFFF;
            ushort MaxCopy = 34;

            Range = (ushort) 0x07FF;
            SourcePosition = 0;
            BitCounter = 0;
            Offset = 0;
            CopyLength = 0;
            WorkingPosition = 3;
            PacketHeaderOffset = 2;
            while(SourcePosition < MaxSize && WorkingPosition < CurrentSize)
            {
                 for(; BitCounter < 8 && SourcePosition < MaxSize; BitCounter++)
                 {
                     if (SourcePosition > Range)
                     {
                         j = (short) (SourcePosition - Range);
                     }
                     else
                     {
                         j = 0;
                     }
                     for(; j < SourcePosition; j++)
                     {
                         
                         for (k = 0; SourcePosition + k < MaxSize && RawData[j + k] == RawData[SourcePosition + k]; k++)
                         {
                         }
                         if (k > CopyLength)  //Should this be >= ? If I do that, it fixes some things but breaks others?
                         {
                             Offset = (ushort)j;
                             CopyLength = (ushort)k;
                         }
                     }
                     if (CopyLength > MaxCopy) // This was my latest experiment. Not sure if it's right at all.
                     {
                         Offset += (ushort)(CopyLength - MaxCopy);
                         CopyLength = MaxCopy;
                     }
                     if(CopyLength > 2)
                     {
                         TempData[PacketHeaderOffset] |= (byte) (1 << BitCounter);
                         Offset = (ushort)(0x800 - (SourcePosition - Offset));
                         TempData[WorkingPosition++] = (byte)((((Offset >> 8) & 0x07)) | (CopyLength - 3) << 3);
                         TempData[WorkingPosition++] = (byte)(Offset & 0xFF);
                         SourcePosition += CopyLength;
                         CopyLength = 0;
                     }
                     else
                     {
                         TempData[WorkingPosition++] = RawData[SourcePosition++];
                     }
                 }
                 if(BitCounter == 8)
                 {
                     BitCounter = 0;
                     TempData[PacketHeaderOffset] = (byte)~TempData[PacketHeaderOffset];
                     PacketHeaderOffset = WorkingPosition;
                     WorkingPosition++;
                 }
            }
            if(WorkingPosition < MaxSize)
            {
                if(BitCounter > 0)
                {
                    TempData[PacketHeaderOffset] |= (byte) (0xFF << BitCounter); // Lots commented here because I hadn't started playing with that part yet.
                    //Array.Copy(TempData, PacketHeaderOffset, TempData, PacketHeaderOffset + 3, WorkingPosition - PacketHeaderOffset);
                    //TempData[PacketHeaderOffset] = (byte) (BitCounter | 0xC0);
                    //CurrentSize = (ushort) (WorkingPosition + 3);
                    //TempData[PacketHeaderOffset + 1] = (byte) CurrentSize;
                    //TempData[PacketHeaderOffset + 2] = (byte) (CurrentSize >> 8);
                    //TempData[WorkingPosition + 3] = 0;
                }
                else
                {
                    CurrentSize = (ushort) (PacketHeaderOffset + 1);
                    TempData[PacketHeaderOffset] = (byte) 0xC0;
                }
                CurrentSize = (ushort)(PacketHeaderOffset - 4);
                TempData[0] = (byte) (CurrentSize & 0xFF);
                TempData[1] = (byte) (CurrentSize >> 8);
            }
            CurrentSize = (ushort)(PacketHeaderOffset - 2);
            for (int i = 0; i < CurrentSize; i++)
            {
                Compressed.Add(TempData[i]);
            }
            if ((CurrentSize + 2) >= CompressedSize && !IncreasedSize)
            {
                ReadyToSave = false;
            }
            else
            {
                ReadyToSave = true;
            }
            return ReadyToSave;
        }
Mauron wuz here.

Magil

Hmm, it could be always helpful if you can provide some sample data to test with.

Mauron

...I've really taken over 60 days to get back to this?

There's been some progress on my end, but I'm still stuck.

public bool Compress(byte[] RawData, bool IncreasedSize)
        {
            Compressed.Initialize();
            int MaxSize = IncreasedSize ? RawData.Length : CompressedSize;
            Array.Resize(ref Compressed, MaxSize);
            Compressed[0] = (byte)(MaxSize & 0xFF);
            Compressed[1] = (byte)((MaxSize & 0xFF00) / 0x100);
            int j;
            short k;
            byte BitCounter;
            ushort Offset;
            ushort CopyLength;
            int SourcePosition;
            ushort WorkingPosition;
            ushort PacketHeaderOffset;
            ushort Range;
            ushort CurrentSize = 0xFFFF;
            ushort MaxCopy = 34;
            int StopPoint = 0;

            Range = (ushort) 0x07FF;
            SourcePosition = 0;
            BitCounter = 0;
            Offset = 0;
            CopyLength = 0;
            WorkingPosition = 5;
            PacketHeaderOffset = 4;
            while(SourcePosition < MaxSize && WorkingPosition < CompressedSize)
            {
                 for(; BitCounter < 8 && SourcePosition < MaxSize; BitCounter++)
                 {
                     if (SourcePosition > Range)
                     {
                         StopPoint = (short) (SourcePosition - Range);
                         j = SourcePosition - 1;
                     }
                     else
                     {
                         StopPoint = 0;
                         j = SourcePosition - 1;
                     }
                     //Reversing this loop increased the accuracy, but there's still some errors.
                     for(; j >= StopPoint; j--)
                     {
                         for (k = 0; SourcePosition + k < MaxSize && RawData[j + k] == RawData[SourcePosition + k]; k++)
                         {
                         }
                         if (k > CopyLength)
                         {
                             Offset = (ushort)j;
                             CopyLength = (ushort)k;
                             if (k >= MaxCopy)
                             {
                                 CopyLength = MaxCopy;
                             }
                         }

                     }
                     if(CopyLength >= 3)
                     {
                         Compressed[PacketHeaderOffset] |= (byte)(1 << BitCounter);
                         Offset = (ushort)(0x800 - (SourcePosition - Offset));
                         Compressed[WorkingPosition++] = (byte)((((Offset >> 8) & 0x07)) | (CopyLength - 3) << 3);
                         Compressed[WorkingPosition++] = (byte)(Offset & 0xFF);
                         SourcePosition += CopyLength;
                         CopyLength = 0;
                     }
                     else
                     {
                         Compressed[WorkingPosition++] = RawData[SourcePosition++];
                     }
                 }
                 if(BitCounter == 8)
                 {
                     BitCounter = 0;
                     Compressed[PacketHeaderOffset] = (byte)~Compressed[PacketHeaderOffset];
                     PacketHeaderOffset = WorkingPosition;
                     WorkingPosition++;
                 }
            }
            if(WorkingPosition < MaxSize)
            {
                // The Closing area is untouched until I fix the earlier problems.
                if(BitCounter > 0)
                {
                    Compressed[PacketHeaderOffset] |= (byte)(0xFF << BitCounter);
                    //Array.Copy(TempData, PacketHeaderOffset, TempData, PacketHeaderOffset + 3, WorkingPosition - PacketHeaderOffset);
                    //TempData[PacketHeaderOffset] = (byte) (BitCounter | 0xC0);
                    //CurrentSize = (ushort) (WorkingPosition + 3);
                    //TempData[PacketHeaderOffset + 1] = (byte) CurrentSize;
                    //TempData[PacketHeaderOffset + 2] = (byte) (CurrentSize >> 8);
                    //TempData[WorkingPosition + 3] = 0;
                }
                else
                {
                    CurrentSize = (ushort) (PacketHeaderOffset + 1);
                    Compressed[PacketHeaderOffset] = (byte)0xC0;
                }
                CurrentSize = (ushort)(PacketHeaderOffset - 4);
                Compressed[2] = (byte) (CurrentSize & 0xFF);
                Compressed[3] = (byte)(CurrentSize >> 8);
            }
            CurrentSize = (ushort)(PacketHeaderOffset - 2);
            if ((CurrentSize + 2) >= CompressedSize && !IncreasedSize)
            {
                ReadyToSave = false;
            }
            else
            {
                ReadyToSave = true;
            }
            return ReadyToSave;
        }


And your friendly neighborhood samples!

test.bin is a dump of my current results. Sprite - 19somethingorother.bin is a copy of the compressed data, and Sprite - 19somethingorother - Decompressed.bin is the decompressed sprite.
Mauron wuz here.

Magil

Just took a glance at the samples you provided, and recalled I saw some similar compression before.
I'll PM you the link since it is not publicly accessable.

And what's more, the compression is byte based instead of bit if I'm not mistaken...

esperknight

If your using LZSS then you may not get an exact compression match unless you code it exactly how the original LZSS algorithm is coded.  Due to using sorted binary trees it had the effect of applying RLE to the filler byte (usually 0x00 or if the used the original 0x20).  I'm thinking this was unintentional as it's not mentioned anywhere in the comments.  As long as your compressed version decompresses fine then there's nothing to worry about.

If you want it though I converted the original LZSS code over to C# if you want to look at it or use it for compressing this (as giving this a glance over, it looks like the game is using the original).

Edit : Posting my code anyways in case anyone else is interested. This is mostly unmodified except for the match length (F) and the ring buffer (N).  This is the same code used for AMG PC-FX and the same code that most games use when using LZSS.

The original C code can be found here : LZSS.C


static class LZSS
    {
        const int N = 0x0400;    /* size of ring buffer */
        const int F = 18;    /* upper limit for match_length */
        const int THRESHOLD = 2;   /* encode string into position and length
                                   if match_length is greater than this */
        const int NIL = N;    /* index for root of binary search trees */

        static ulong
                textsize = 0,    /* text size counter */
                codesize = 0,    /* code size counter */
                printcount = 0;    /* counter for reporting progress every 1K bytes */
        static byte[]
                text_buf = new byte[Math.Abs(N + F - 1)];    /* ring buffer of size N,
                    with extra F-1 bytes to facilitate string comparison */
        static int match_position, match_length;  /* of longest match.  These are
                    set by the InsertNode() procedure. */
        static int[]
                lson = new int[N + 1], rson = new int[N + 257], dad = new int[N + 1];  /* left & right children &
                    parents -- These constitute binary search trees. */

        static void InitTree()  /* initialize trees */
        {
            int i;

            /* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
               left children of node i.  These nodes need not be initialized.
               Also, dad[i] is the parent of node i.  These are initialized to
               NIL (= N), which stands for 'not used.'
               For i = 0 to 255, rson[N + i + 1] is the root of the tree
               for strings that begin with character i.  These are initialized
               to NIL.  Note there are 256 trees. */

            for (i = N + 1; i <= N + 256; i++) rson[i] = NIL;
            for (i = 0; i < N; i++) dad[i] = NIL;
        }

        static void InsertNode(int r)
        /* Inserts string of length F, text_buf[r..r+F-1], into one of the
           trees (text_buf[r]'th tree) and returns the longest-match position
           and length via the global variables match_position and match_length.
           If match_length = F, then removes the old node in favor of the new
           one, because the old one will be deleted sooner.
           Note r plays double role, as tree node and position in buffer. */
        {
            int i, p, cmp;

            byte[] key = new byte[text_buf.Length];
            for (int j = 0; r + j < text_buf.Length; j++)
            {
                key[j] = text_buf[r + j];
            }

            cmp = 1; p = N + 1 + key[0];
            rson[r] = lson[r] = NIL; match_length = 0;
            for (; ; )
            {
                if (cmp >= 0)
                {
                    if (rson[p] != NIL) p = rson[p];
                    else { rson[p] = r; dad[r] = p; return; }
                }
                else
                {
                    if (lson[p] != NIL) p = lson[p];
                    else { lson[p] = r; dad[r] = p; return; }
                }
                for (i = 1; i < F; i++)
                    if ((cmp = key[i] - text_buf[p + i]) != 0) break;
                if (i > match_length)
                {
                    match_position = p;
                    if ((match_length = i) >= F) break;
                }
            }
            dad[r] = dad[p]; lson[r] = lson[p]; rson[r] = rson[p];
            dad[lson[p]] = r; dad[rson[p]] = r;
            if (rson[dad[p]] == p) rson[dad[p]] = r;
            else lson[dad[p]] = r;
            dad[p] = NIL;  /* remove p */
        }

        static void DeleteNode(int p)  /* deletes node p from tree */
        {
            int q;

            if (dad[p] == NIL) return;  /* not in tree */
            if (rson[p] == NIL) q = lson[p];
            else if (lson[p] == NIL) q = rson[p];
            else
            {
                q = lson[p];
                if (rson[q] != NIL)
                {
                    do { q = rson[q]; } while (rson[q] != NIL);
                    rson[dad[q]] = lson[q]; dad[lson[q]] = dad[q];
                    lson[q] = lson[p]; dad[lson[p]] = q;
                }
                rson[q] = rson[p]; dad[rson[p]] = q;
            }
            dad[q] = dad[p];
            if (rson[dad[p]] == p) rson[dad[p]] = q; else lson[dad[p]] = q;
            dad[p] = NIL;
        }

        static public MemoryStream Encode(MemoryStream infile)
        {
            infile.Seek(0, SeekOrigin.Begin);

            MemoryStream outfile = new MemoryStream();

            int i, c, len, r, s, last_match_length, code_buf_ptr;
            byte[] code_buf = new byte[17];
            byte mask;

            InitTree();  /* initialize trees */
            code_buf[0] = 0;  /* code_buf[1..16] saves eight units of code, and
                code_buf[0] works as eight flags, "1" representing that the unit
                is an unencoded letter (1 byte), "0" a position-and-length pair
                (2 bytes).  Thus, eight units require at most 16 bytes of code. */
            code_buf_ptr = mask = 1;
            s = 0; r = N - F;
            for (i = s; i < r; i++) text_buf[i] = 0x00;  /* Clear the buffer with
                any character that will appear often. */
            for (len = 0; len < F && (infile.Position < infile.Length); len++)
            {
                c = infile.ReadByte();
                text_buf[r + len] = (byte)c;  /* Read F bytes into the last F bytes of
                    the buffer */
            }
            if ((textsize = (ulong)len) == 0) return outfile;  /* text of size zero */
            for (i = 1; i <= F; i++) InsertNode(r - i);  /* Insert the F strings,
                each of which begins with one or more 'space' characters.  Note
                the order in which these strings are inserted.  This way,
                degenerate trees will be less likely to occur. */
            InsertNode(r);  /* Finally, insert the whole string just read.  The
                global variables match_length and match_position are set. */
            do
            {
                if (match_length > len) match_length = len;  /* match_length
                    may be spuriously long near the end of text. */
                if (match_length <= THRESHOLD)
                {
                    match_length = 1;  /* Not long enough match.  Send one byte. */
                    code_buf[0] |= mask;  /* 'send one byte' flag */
                    code_buf[code_buf_ptr++] = text_buf[r];  /* Send uncoded. */
                }
                else
                {
                    code_buf[code_buf_ptr++] = (byte)match_position;
                    code_buf[code_buf_ptr++] = (byte)
                        (((match_position >> 4) & 0xf0)
                      | (match_length - (THRESHOLD + 1)));  /* Send position and
                            length pair. Note match_length > THRESHOLD. */
                }
                if ((mask <<= 1) == 0)
                {  /* Shift mask left one bit. */
                    for (i = 0; i < code_buf_ptr; i++)  /* Send at most 8 units of */
                        outfile.Write(code_buf, i, 1);    /* code together */
                    codesize += (ulong)code_buf_ptr;
                    code_buf[0] = 0; code_buf_ptr = mask = 1;
                }
                last_match_length = match_length;
                for (i = 0; i < last_match_length &&
                        (infile.Position < infile.Length); i++)
                {
                    c = infile.ReadByte();
                    DeleteNode(s);        /* Delete old strings and */
                    text_buf[s] = (byte)c;    /* read new bytes */
                    if (s < F - 1) text_buf[s + N] = (byte)c;  /* If the position is
                        near the end of buffer, extend the buffer to make
                        string comparison easier. */
                    s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
                    /* Since this is a ring buffer, increment the position
                       modulo N. */
                    InsertNode(r);    /* Register the string in text_buf[r..r+F-1] */
                }
                if ((textsize += (ulong)i) > printcount)
                {
                    Console.WriteLine(textsize.ToString("X2")); printcount += 1024;
                    /* Reports progress each time the textsize exceeds
                       multiples of 1024. */
                }
                while (i++ < last_match_length)
                {    /* After the end of text, */
                    DeleteNode(s);                    /* no need to read, but */
                    s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
                    if ((--len) > 0) InsertNode(r);        /* buffer may not be empty. */
                }
            } while (len > 0);    /* until length of string to be processed is zero */
            if (code_buf_ptr > 1)
            {        /* Send remaining code. */
                for (i = 0; i < code_buf_ptr; i++) outfile.Write(code_buf, i, 1);
                codesize += (ulong)code_buf_ptr;
            }
            Console.Write("In : " + textsize.ToString("X2") + " bytes\n");
            Console.Write("Out: " + codesize.ToString("X2") + " bytes\n");
            Console.Write("Out/In: " + (double)codesize / textsize + "\n");

            return outfile;
        }
    }

Mauron

Magil: Thanks for the link. I had seen that topic before, but only glanced at it.

Quote from: esperknight on February 15, 2011, 03:36:53 AM
If your using LZSS then you may not get an exact compression match unless you code it exactly how the original LZSS algorithm is coded.

That's where I'm running into the problem. I doesn't match exactly.
Mauron wuz here.

esperknight

Although it doesn't match have you ran your compressed version through your decompressor?  As long as it works that's what counts.  Otherwise I'll take a look tonight and see if I can help you out.

FinS

Here is a log of the decompression routine for the same sprite made from Geiger's debugger.
link

I think you are all right that the best way to write a compressor is to examine the decompression routine more closely so I figured out how the decompressor works. I don't think I can write a compressor but hopefully this will help you to fix yours but I will take a look at your source anyway and see if I can figure anything out.

I added spaces between the iterations in the routine to make them more clear.  I think everyone understands the first 4 bytes pretty well but anyway the first 2 (#0014) are the length of the output in reverse and the next 2 (#3a0b) don't really seem too important as long as they are not equal to #0000. They are not stored anywhere and are not referenced anywhere else in the routine.

The next byte (#ff) is a control code which can be more easily understood in binary, 1111 1111. This is lsr'd each iteration and if the bit is '0' then that iteration will use a 2 byte control code.

So since there are no '0' bits here write the next 8 bytes directly to ram then there is another control code.

This control code, #fb (or 11111011 in binary), tells it to write 2 bytes then the 3rd iteration will be a 2 byte control code. So write next 2 bytes to ram (0a 06) then ...

These 2 bytes #0ffe can be explained by changing them to 00001 11111111110 binary. The first 5 bits + #3 more are the number of bytes to rewrite. so in this case #01 + #03 = #04 bytes will be rewritten froma previous location to the current location. The next 11 bits are subtracted from #800 is the number of offsets to jump backwards to read.  So #800 - #7fe (11111111110) = #02 or in other words read 2 offsets behind the current offset being written to. So it writes 0a 06 then since 0a 06 was just written it is there now to read so the entire output of this control code is '0a 06 0a 06'.

That was a total of 3 iterations since the #fb (11111011) control code so 5 more bytes are written directly to ram and there is another control code.

So that's really all there is to it. I had a lot more details written up but then when I tried to post it I had been logged out and it was all lost.

Please ask me if you have  any questions about it.

Mauron

Quote from: FinS on February 15, 2011, 05:45:26 PMI added spaces between the iterations in the routine to make them more clear.  I think everyone understands the first 4 bytes pretty well but anyway the first 2 (#0014) are the length of the output in reverse and the next 2 (#3a0b) don't really seem too important as long as they are not equal to #0000. They are not stored anywhere and are not referenced anywhere else in the routine.

#0b3a is the compressed size. If that's 0, the data is not compressed.

I've got a converted decompression routine available, although it could use some rewriting to operate better. The file I linked to was generated through that, so it does work.

esperknight: I'll test my output later. I doubt it works perfectly, as the original routine had some code I haven't adapted to this game.

Edit:  OH YES!

With a couple changes, running the recompressed data through the decompresser matched the first run's output exactly!
Mauron wuz here.

esperknight

And that's what counts :)  When I first wrote my own LZSS compression routine for Comic Party mine didn't match up either which drove me nuts.  Then I realized it didn't matter so long as the file decompresses correctly.  Course now if I want byte exact I use the routine I pasted below or the C one depending on if I'm coding in C++ or C#.

Fins, I agree with you on studying the decompressor to see how to recompress but for this one it's a very common compression algorithm so you can study the original code to learn how to do it luckily.  The pain is recognizing these though as when I first encountered it I didn't recognize it till someone pointed it out.  Course not all compression schemes are something well known but we programmers are lazy so we tend to use what others have done ;)

I recommend to anyone who wants to tackle anything PCE/PC-FX related learn this routine inside and out as you'll see it A LOT.

FinS

Sorry if my comments implied that I thought the decompression scheme needed work. That was not what I meant but I only wanted to help with the compression and I thought a thorough explanation of the routine in Ogre Battle would help any coders who were unfamiliar with it already to share ideas with the compression source code.

esperknight

No worries.  I wrote mine a bit fast.  I just wanted to make sure no one got confused in thinking that this wasn't an LZSS scheme but something different :)

Geiger

Wanted to point out, since you started from my code, that neither the Chrono Trigger nor Final Fantasy VI routines match the original compression output.  I quickly realized while reverse engineering these that so long as the decompressed data was not corrupted, trying to figure out the original compression tool's magic numbers would only be a gigantic time sink.
This is the patent age of new inventions -/- For killing bodies, and for saving souls, -/- All propagated with the best intentions. --Lord Byron

Mauron

Quote from: Geiger on February 16, 2011, 11:06:19 AM
Wanted to point out, since you started from my code, that neither the Chrono Trigger nor Final Fantasy VI routines match the original compression output.  I quickly realized while reverse engineering these that so long as the decompressed data was not corrupted, trying to figure out the original compression tool's magic numbers would only be a gigantic time sink.

Interesting, I hadn't realized they didn't match.

Thanks for publishing your routines, by the way, I found them more helpful than some of the other things I saw.
Mauron wuz here.

FinS

I modified the source code of Haruhiko Okumura to work for Ogre Battle so it matches the compression scheme exactly. Hope this helps.
link

//Ogre Battle: March of the Black Queen - compressor and decompressor  .c

// Greetings Andre Perrot <ovaron@gmx.net> who modified the code for GBA

/*re-adapted for Ogre Battle codec on Snes by Finnegan Shore*/

/**************************************************************
LZSS.C -- A Data Compression Program
(tab = 4 spaces)
***************************************************************
4/6/1989 Haruhiko Okumura
Use, distribute, and modify this program freely.
Please send me your improved versions.
PC-VAN SCIENCE
NIFTY-Serve PAF01022
CompuServe 74050,1022
**************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
typedef unsigned long u32;
struct stat finfo;

#define N 2048 /* size of ring buffer */
#define F    34 /* upper limit for match_length */
#define THRESHOLD 2   /* encode string into position and length
   if match_length is greater than this */
#define NIL N /* index for root of binary search trees */

unsigned long int
textsize = 0, /* text size counter */
codesize = 0, /* code size counter */
printcount = 0; /* counter for reporting progress every 1K bytes */
unsigned char
text_buf[N + F - 1]; /* ring buffer of size N,
with extra F-1 bytes to facilitate string comparison */
int match_position, match_length,  /* of longest match.  These are
set by the InsertNode() procedure. */
lson[N + 1], rson[N + 257], dad[N + 1];  /* left & right children &
parents -- These constitute binary search trees. */
FILE *infile, *outfile;  /* input & output files */

void InitTree(void)  /* initialize trees */
{
int  i;

/* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
   left children of node i.  These nodes need not be initialized.
   Also, dad[i] is the parent of node i.  These are initialized to
   NIL (= N), which stands for 'not used.'
   For i = 0 to 255, rson[N + i + 1] is the root of the tree
   for strings that begin with character i.  These are initialized
   to NIL.  Note there are 256 trees. */

for (i = N + 1; i <= N + 256; i++) rson[i] = NIL;
for (i = 0; i < N; i++) dad[i] = NIL;
}

void InsertNode(int r)
/* Inserts string of length F, text_buf[r..r+F-1], into one of the
   trees (text_buf[r]'th tree) and returns the longest-match position
   and length via the global variables match_position and match_length.
   If match_length = F, then removes the old node in favor of the new
   one, because the old one will be deleted sooner.
   Note r plays double role, as tree node and position in buffer. */
{
int  i, p, cmp;
unsigned char  *key;

cmp = 1;  key = &text_buf[r];  p = N + 1 + key[0];
rson[r] = lson[r] = NIL;  match_length = 0;
for ( ; ; ) {
if (cmp >= 0) {
if (rson[p] != NIL) p = rson[p];
else {  rson[p] = r;  dad[r] = p;  return;  }
} else {
if (lson[p] != NIL) p = lson[p];
else {  lson[p] = r;  dad[r] = p;  return;  }
}
for (i = 1; i < F; i++)
if ((cmp = key[i] - text_buf[p + i]) != 0)  break;
if (i > match_length) {
match_position = p;
if ((match_length = i) >= F)  break;
}
}
dad[r] = dad[p];  lson[r] = lson[p];  rson[r] = rson[p];
dad[lson[p]] = r;  dad[rson[p]] = r;
if (rson[dad[p]] == p) rson[dad[p]] = r;
else                   lson[dad[p]] = r;
dad[p] = NIL;  /* remove p */
}

void DeleteNode(int p)  /* deletes node p from tree */
{
int  q;

if (dad[p] == NIL) return;  /* not in tree */
if (rson[p] == NIL) q = lson[p];
else if (lson[p] == NIL) q = rson[p];
else {
q = lson[p];
if (rson[q] != NIL) {
do {  q = rson[q];  } while (rson[q] != NIL);
rson[dad[q]] = lson[q];  dad[lson[q]] = dad[q];
lson[q] = lson[p];  dad[lson[p]] = q;
}
rson[q] = rson[p];  dad[rson[p]] = q;
}
dad[q] = dad[p];
if (rson[dad[p]] == p) rson[dad[p]] = q;  else lson[dad[p]] = q;
dad[p] = NIL;
}





void Encode(void)
{
int  i, c, len, r, s, last_match_length, code_buf_ptr;
unsigned char  code_buf[17], mask;
u32 header = (finfo.st_size);
unsigned char* tmp = (unsigned char*)&header;
printf("header: %x\n", header );
for(i=0; i<2; i++) putc( tmp[i], outfile );
tmp[i]=codesize;
for(i=0; i<2; i++) putc( tmp[i], outfile );


InitTree();  /* initialize trees */
code_buf[0] = 1;  /* code_buf[1..16] saves eight units of code, and
code_buf[0] works as eight flags, "0" representing that the unit
is an unencoded letter (1 byte), "1" a position-and-length pair
(2 bytes).  Thus, eight units require at most 16 bytes of code. */
code_buf_ptr = mask = 1;
s = 0;
r = N - F; // 4078

for (i = s; i < r; i++) text_buf[i] = 0xff;  /* Clear the buffer with any character that will appear often. */
for (len = 0; len < F && (c = getc(infile)) != EOF; len++)
text_buf[r + len] = c;  /* Read F bytes into the last F bytes of
the buffer */
if ((textsize = len) == 0) return;  /* text of size zero */
for (i = 1; i <= F; i++) InsertNode(r - i);  /* Insert the F strings,
each of which begins with one or more 'space' characters.  Note
the order in which these strings are inserted.  This way,
degenerate trees will be less likely to occur. */
InsertNode(r);  /* Finally, insert the whole string just read.  The
global variables match_length and match_position are set. */


// compression loop

do {
if (match_length > len) match_length = len;  /* match_length
may be spuriously long near the end of text. */

// not compressed
if (match_length <= THRESHOLD) {
match_length = 1;  /* Not long enough match.  Send one byte. */
// original: code_buf[0] |= mask;  /* 'send one byte' flag */
code_buf[0] |= mask;  // flag "compressed" set
code_buf[code_buf_ptr++] = text_buf[r];  /* Send uncoded. */
} else
// compress
{
code_buf[code_buf_ptr++] = (unsigned char)
((((r-match_position-1)>>8)&7^7) |
((match_length - 3)<<3));         // subtract (THRESHOLD+1)

code_buf[code_buf_ptr++] = (unsigned char) ((r-match_position-1) ^ 0xff);
/* Send position and length pair. Note match_length > THRESHOLD. */
}

// mask shift
if ((mask <<= 1) == 0) {  /* Shift mask left one bit. */
for (i = 0; i < code_buf_ptr; i++)  /* Send at most 8 units of */
putc(code_buf[i], outfile) ;     /* code together */
codesize += code_buf_ptr;
code_buf[0] = 0;  code_buf_ptr = mask = 1;

}

last_match_length = match_length;
for (i = 0; i < last_match_length &&
(c = getc(infile)) != EOF; i++) {
DeleteNode(s); /* Delete old strings and */
text_buf[s] = c; /* read new bytes */
if (s < F - 1) text_buf[s + N] = c;  /* If the position is
near the end of buffer, extend the buffer to make
string comparison easier. */
s = (s + 1) & (N - 1);  r = (r + 1) & (N - 1);
/* Since this is a ring buffer, increment the position
   modulo N. */
InsertNode(r); /* Register the string in text_buf[r..r+F-1] */
}
if ((textsize += i) > printcount) {
printf("%12ld\r", textsize);  printcount += 1024;
//getchar();
/* Reports progress each time the textsize exceeds
   multiples of 1024. */
}
while (i++ < last_match_length) { /* After the end of text, */
DeleteNode(s); /* no need to read, but */
s = (s + 1) & (N - 1);  r = (r + 1) & (N - 1);
if (--len) InsertNode(r); /* buffer may not be empty. */
}
} while (len > 0); /* until length of string to be processed is zero */


if (code_buf_ptr > 1) { /* Send remaining code. */
for (i = 0; i < code_buf_ptr; i++) putc(code_buf[i], outfile);
codesize += code_buf_ptr;
}


printf("In : %ld bytes\n", textsize); /* Encoding is done. */
printf("Out: %ld bytes\n", codesize);
printf("Out/In: %.3f\n", (double)codesize / textsize);
}



void Decode(void) /* Just the reverse of Encode(). */
// also fixed to decode in the way bios funktion works
{
int  i, j, k, r, c, z;
unsigned int  flags;
u32 decomp_size; // i´m using decomp_size and cur_size to make sure we dont "decompress" the padding data
u32 cur_size=0; // we added to the compressed data to keep it in 4 byte boundaries

// discard header info
u32 header;
unsigned char* tmp = (unsigned char*)&header;
for(i=0; i<4; i++) tmp[i] = getc(infile);
decomp_size = header&0xffff;
printf("header: %x, decompressed size: %d\n", header&0xffff, decomp_size );

for (i = 0; i < N - F; i++) text_buf[i] = 0;
r = N - F;  flags = z = 7;
for ( ; ; ) {
flags >>= 1;
z++;
if (z == 8) { // read new block flag
if ((c = getc(infile)) == EOF) break;
flags = c;
//getchar();
//printf ("x %X %x %x %x %x\n",r,j,i,(r+j-0x800),c);
z = 0; // reset counter
}
if (!(flags&0x01)) {
if ((i = getc(infile)) == EOF) break;
if ((j = getc(infile)) == EOF) break;
j = 0x800-(j | ((i<<8)&0x700)); // match offset
i = ((i>>3)&0x1f)+THRESHOLD; // match length
for (k = 0; k <= i; k++) {
c = text_buf[(r-j) & (N - 1)];
//getchar();
//printf ("h %X %x %x %x %x %x\n",r,j,i,(r-j-1),c,cur_size);
if(cur_size<decomp_size) putc(c, outfile);  text_buf[r++] = c;  r &= (N - 1); cur_size++;
}
}
else { // flag bit zero => uncompressed
if ((c = getc(infile)) == EOF) break;
if(cur_size<decomp_size) putc(c, outfile);  text_buf[r++] = c;  r &= (N - 1); cur_size++;
}

}
}


int main(int argc, char *argv[])
{
char  *s = argv[1];

if (argc != 4) {
printf("'lzss.exe e file1 file2' encodes file1 into file2.\n"
"'lzss.exe d file1 file2' decodes file1 into file2.\n");
return EXIT_FAILURE;
}
if ((strcmp(argv[1], argv[2])==0)
|| (s = argv[1], s[1] || strpbrk(s, "DEde") == NULL)
|| (s = argv[2], (infile  = fopen(s, "rb")) == NULL)
|| (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) {
printf("??? %s\n", s);  return EXIT_FAILURE;
}
stat( argv[2], &finfo ); // get filesize for header

if (toupper(*argv[1]) == 'E') Encode();  else Decode();

/*
infile  = fopen("test.txt.lz", "rb"); // for debugging in gdb
outfile = fopen("test2", "wb");
stat( "test.txt", &finfo );
Encode();
Decode();
*/
fclose(infile);  fclose(outfile);
return EXIT_SUCCESS;
}