/***********************************************************************
  This file is part of HA, a general purpose file archiver.
  Copyright (C) 1995 Harri Hirvola

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
************************************************************************
	HA HSC method
***********************************************************************/

#include <malloc.h>
#include <stdlib.h>
#include <stdio.h>
#include "ha.h"
#include "haio.h"
#include "acoder.h"
#include "hsc.h"
#include "error.h"

#define IECLIM		32	       /* initial escape counter upper limit */
#define NECLIM		5	       /* no escape expected counter limit */
#define NECTLIM		4	       /* */
#define NECMAX		10	       /* no escape expected counter maximum */
#define MAXCLEN		4	       /* assumed to be 4 in several places */
#define NUMCON		10000	       /* number of contexts to remember */ 
#define NUMCFB		32760	       /* number of frequencies to remember */
#define ESCTH		3	       /* threshold for escape calculation */
#define MAXTVAL		8000	       /* maximum frequency value */
#define RFMINI		4	       /* initial refresh counter value */
#define HTLEN	        16384	       /* length of hash table */
#define NIL		0xffff	       /* NIL pointer in lists */
#define ESC		256	       /* escape symbol */

typedef unsigned char Context[4];

/* model data */
static Context curcon;		      /* current context */
static U16B *ht=NULL;		      /* hash table */
static U16B *hp=NULL;		      /* hash list pointer array */
static Context *con=NULL;	      /* context array */
static unsigned char *cl=NULL;	      /* context length array */
static unsigned char *cc=NULL;	      /* character counts */
static U16B *ft=NULL;		      /* total frequency of context */
static unsigned char *fe=NULL;	      /* frequencys under ESCTH in context */
static U16B *elp=NULL;		      /* expire list previous pointer array */
static U16B *eln=NULL;		      /* expire list next pointer array */
static U16B elf,ell;		      /* first and last of expire list */
static unsigned char *rfm=NULL;	      /* refresh counter array */
static U16B *fa=NULL;		      /* frequency array */
static unsigned char *fc=NULL;	      /* character for frequency array */
static U16B *nb=NULL;		      /* next pointer for frequency array */
static U16B fcfbl;		      /* pointer to free frequency blocks */
static U16B nrel;		      /* context for frequency block release */

/* frequency mask system */
static unsigned char cmask[256];      /* masked characters */
static unsigned char cmstack[256];    /* stack of cmask[] entries to clear */ 
static S16B cmsp;		      /* pointer to cmstack */

/* escape propability modifying system variables */
static unsigned char nec;	      /* counter for no escape expected */
static unsigned char iec[MAXCLEN+1];  /* initial escape counters */

/* update stack variables */
static U16B usp;		      /* stack pointer */
static U16B cps[MAXCLEN+1]; 	      /* context pointers */
static U16B as[MAXCLEN+1];	      /* indexes to frequency array */

/* miscalneous */
static S16B dropcnt;		      /* counter for context len drop */
static unsigned char maxclen;	      /* current maximum length for context */ 
static U16B hrt[HTLEN];		      /* semi random data for hashing */
static U16B hs[MAXCLEN+1]; 	      /* hash stack for context search */ 
static S16B cslen;		      /* length of context to search */
 
/***********************************************************************
	Cleanup routine
***********************************************************************/

void hsc_cleanup(void) {
    
    if (ht!=NULL) free(ht),ht=NULL;
    if (fc!=NULL) free(fc),fc=NULL;
    if (fa!=NULL) free(fa),fa=NULL;
    if (ft!=NULL) free(ft),ft=NULL;
    if (fe!=NULL) free(fe),fe=NULL;
    if (nb!=NULL) free(nb),nb=NULL;
    if (hp!=NULL) free(hp),hp=NULL;
    if (elp!=NULL) free(elp),elp=NULL;
    if (eln!=NULL) free(eln),eln=NULL;
    if (cl!=NULL) free(cl),cl=NULL;
    if (cc!=NULL) free(cc),cc=NULL;
    if (rfm!=NULL) free(rfm),rfm=NULL;
    if (con!=NULL) free(con),con=NULL;
}


/***********************************************************************
	System initialization
***********************************************************************/

static  U16B make_context(unsigned char cl, S16B c);

static void init_model(void) {

    register S16B i;
    S32B z,l,h,t;
    
    ht=malloc(HTLEN*sizeof(*ht));		
    hp=malloc(NUMCON*sizeof(*hp));
    elp=malloc(NUMCON*sizeof(*elp));
    eln=malloc(NUMCON*sizeof(*eln));
    cl=malloc(NUMCON*sizeof(*cl));
    cc=malloc(NUMCON*sizeof(*cc));
    ft=malloc(NUMCON*sizeof(*ft));
    fe=malloc(NUMCON*sizeof(*fe));
    rfm=malloc(NUMCON*sizeof(*rfm));
    con=malloc(NUMCON*sizeof(*con));
    fc=malloc(NUMCFB*sizeof(*fc));
    fa=malloc(NUMCFB*sizeof(*fa));
    nb=malloc(NUMCFB*sizeof(*nb));
    if (hp==NULL || elp==NULL || eln==NULL ||
	cl==NULL || rfm==NULL || con==NULL ||
	cc==NULL || ft==NULL || fe==NULL ||
	fc==NULL || fa==NULL || nb==NULL || ht==NULL) {
	hsc_cleanup();
	error(1,ERR_MEM,"init_model()");
    }
    maxclen=MAXCLEN;		
    iec[0]=(IECLIM>>1);
    for (i=1;i<=MAXCLEN;++i) iec[i]=(IECLIM>>1)-1;
    dropcnt=NUMCON/4;
    nec=0;
    nrel=0;
    hs[0]=0;
    for (i=0;i<HTLEN;++i) ht[i]=NIL;	
    for (i=0;i<NUMCON;++i) {
	eln[i]=i+1;
	elp[i]=i-1;
	cl[i]=0xff;
	nb[i]=NIL;
    }
    elf=0;
    ell=NUMCON-1;
    for (i=NUMCON;i<NUMCFB-1;++i) nb[i]=i+1;
    nb[i]=NIL;
    fcfbl=NUMCON;
    curcon[3]=curcon[2]=curcon[1]=curcon[0]=0;
    cmsp=0;
    for (i=0;i<256;++i) cmask[i]=0;		
    for (z=10,i=0;i<HTLEN;++i) {
	h=z/(2147483647L/16807L);
	l=z%(2147483647L/16807L);		
	if ((t=16807L*l-(2147483647L%16807L)*h)>0) z=t;
	else z=t+2147483647L;
	hrt[i]=(U16B)z&(HTLEN-1);
    }
}

static void init_pack(void) {

    init_model();
    ac_init_encode();
}

static void init_unpack(void) {

    init_model();
    ac_init_decode();
}


/***********************************************************************
	Finite context model
***********************************************************************/

#define HASH(s,l,h)	{				          \
			  h=0;                                    \
			  if (l) h=hrt[s[0]];                     \
			  if (l>1) h=hrt[(s[1]+h)&(HTLEN-1)];     \
			  if (l>2) h=hrt[(s[2]+h)&(HTLEN-1)];     \
			  if (l>3) h=hrt[(s[3]+h)&(HTLEN-1)];     \
			}                                                     

#define move_context(c) curcon[3]=curcon[2],curcon[2]=curcon[1], \
			curcon[1]=curcon[0],curcon[0]=c

static  void release_cfblocks(void) {
	
    register U16B i,j,d;
    
    do {
	do if (++nrel==NUMCON) nrel=0; while (nb[nrel]==NIL);
	for (i=0;i<=usp;++i) if ((cps[i]&0x7fff)==nrel) break;
    } while (i<=usp);	
    for (i=nb[nrel],d=fa[nrel];i!=NIL;i=nb[i]) if (fa[i]<d) d=fa[i];
    ++d;
    if (fa[nrel]<d) {
	for (i=nb[nrel];fa[i]<d && nb[i]!=NIL;i=nb[i]);
	fa[nrel]=fa[i];
	fc[nrel]=fc[i];
	j=nb[i];
	nb[i]=fcfbl;
	fcfbl=nb[nrel];
	if ((nb[nrel]=j)==NIL) {
	    cc[nrel]=0;
	    fe[nrel]=(ft[nrel]=fa[nrel])<ESCTH?1:0;
	    return;
	}
    }
    fe[nrel]=(ft[nrel]=fa[nrel]/=d)<ESCTH?1:0;
    cc[nrel]=0;
    for (j=nrel,i=nb[j];i!=NIL;) {
	if (fa[i]<d) {
	    nb[j]=nb[i];
	    nb[i]=fcfbl;
	    fcfbl=i;
	    i=nb[j];
	}
	else {
	    ++cc[nrel];
	    ft[nrel]+=fa[i]/=d;
	    if (fa[i]<ESCTH) fe[nrel]++;
	    j=i;
	    i=nb[i];
	}
    }
}

static  U16B make_context(unsigned char conlen, S16B c) {

    register S16B i;
    register U16B nc,fp;
    
    nc=ell;
    ell=elp[nc];
    elp[elf]=nc;
    eln[nc]=elf;
    elf=nc;
    if (cl[nc]!=0xff) {
	if (cl[nc]==MAXCLEN && --dropcnt==0) maxclen=MAXCLEN-1;
	HASH(con[nc],cl[nc],i);
	if (ht[i]==nc) ht[i]=hp[nc];
	else {
	    for (i=ht[i];hp[i]!=nc;i=hp[i]);
	    hp[i]=hp[nc];
	}
	if (nb[nc]!=NIL) {
	    for (fp=nb[nc];nb[fp]!=NIL;fp=nb[fp]);
	    nb[fp]=fcfbl;
	    fcfbl=nb[nc];
	}
    }
    nb[nc]=NIL;
    fe[nc]=ft[nc]=fa[nc]=1;
    fc[nc]=c;
    rfm[nc]=RFMINI;
    cc[nc]=0;
    cl[nc]=conlen;
    con[nc][0]=curcon[0];
    con[nc][1]=curcon[1];
    con[nc][2]=curcon[2];
    con[nc][3]=curcon[3];
    HASH(curcon,conlen,i);
    hp[nc]=ht[i];
    ht[i]=nc;
    return nc;
}

static  void el_movefront(U16B cp) {

    if (cp==elf) return;
    if (cp==ell) ell=elp[cp];
    else {
	elp[eln[cp]]=elp[cp];
	eln[elp[cp]]=eln[cp];
    }	
    elp[elf]=cp;
    eln[cp]=elf;
    elf=cp;
}

static void  add_model(S16B c) {

    register U16B i;
    register S16B cp;
    
    while (usp!=0) {
	i=as[--usp];
	cp=cps[usp];
	if (cp&0x8000) {
	    cp&=0x7fff;
	    if (fcfbl==NIL) release_cfblocks();
	    nb[i]=fcfbl;
	    i=nb[i];
	    fcfbl=nb[fcfbl];
	    nb[i]=NIL;
	    fa[i]=1;			
	    fc[i]=c;
	    ++cc[cp];
	    ++fe[cp];
	}
	else if (++fa[i]==ESCTH) --fe[cp];
	if ((fa[i]<<1)<++ft[cp]/(cc[cp]+1)) --rfm[cp];
	else if (rfm[cp]<RFMINI) ++rfm[cp];
	if (!rfm[cp] || ft[cp]>=MAXTVAL) {
	    ++rfm[cp];
	    fe[cp]=ft[cp]=0;
	    for (i=cp;i!=NIL;i=nb[i]) {
		if (fa[i]>1) {
		    ft[cp]+=fa[i]>>=1;
		    if (fa[i]<ESCTH) ++fe[cp];
		}
		else {
		    ++ft[cp];
		    ++fe[cp];
		}
	    }
	}
    }
}

static  U16B find_next(void) {

    register S16B i,k;
    register U16B cp;
    
    for (i=cslen-1;i>=0;--i) {
	k=hs[i];
	for (cp=ht[k];cp!=NIL;cp=hp[cp]) {
	    if (cl[cp]==i) {
		switch (i) {
		  case 4:
		    if (curcon[3]!=con[cp][3]) break;
		  case 3:
		    if (curcon[2]!=con[cp][2]) break;
		  case 2:
		    if (curcon[1]!=con[cp][1]) break;
		  case 1:
		    if (curcon[0]!=con[cp][0]) break;
		  case 0:
		    cslen=i;
		    return cp;
		}
	    }	
	}
    }
    return NIL;
}

static  U16B find_longest(void) {

    hs[1]=hrt[curcon[0]];	
    hs[2]=hrt[(curcon[1]+hs[1])&(HTLEN-1)]; 
    hs[3]=hrt[(curcon[2]+hs[2])&(HTLEN-1)]; 
    hs[4]=hrt[(curcon[3]+hs[3])&(HTLEN-1)]; 
    usp=0;
    while(cmsp) cmask[cmstack[--cmsp]]=0;
    cslen=MAXCLEN+1;
    return find_next();
}

static U16B adj_escape_prob(U16B esc, U16B cp) {

    if (ft[cp]==1) return iec[cl[cp]]>=(IECLIM>>1)?2:1;
    if (cc[cp]==255) return 1;
    if (cc[cp] && ((cc[cp]+1)<<1)>=ft[cp]) {
	esc=(S16B)((S32B)esc*((cc[cp]+1)<<1)/ft[cp]);
	if (cc[cp]+1==ft[cp]) esc+=(cc[cp]+1)>>1;
    }
    return esc?esc:1;
}


static  S16B code_first(U16B cp, S16B c) {

    register U16B i;
    register S16B sum,cf,tot,esc;
    
    sum=cf=0;
    for (i=cp;i!=NIL;i=nb[i]) {
	if (fc[i]==c) {
	    cf=fa[i];
	    as[0]=i;
	    break;
	}
	sum+=fa[i];
    }	
    tot=ft[cp];
    esc=adj_escape_prob(fe[cp],cp);
    if (nec>=NECLIM) {
	if (tot<=NECTLIM && nec==NECMAX) {
	    tot<<=2;
	    sum<<=2;
	    cf<<=2;
	}
	else {
	    tot<<=1;
	    sum<<=1;
	    cf<<=1;
	}
    }
    usp=1;
    if (cf==0) {
	ac_out(tot,tot+esc,tot+esc);
	for (i=cp;i!=NIL;sum=i,i=nb[i]) {
	    cmstack[cmsp++]=fc[i];
	    cmask[fc[i]]=1;
	}
	as[0]=sum;  /* sum holds last i ! */
	nec=0;
	if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
	cps[0]=0x8000|cp; 
	return 0;
    }
    ac_out(sum,sum+cf,tot+esc);
    if (nec<NECMAX) ++nec;
    if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
    cps[0]=cp;
    return 1;
}


static  S16B code_rest(U16B cp, S16B c) {

    register U16B i;
    register S16B sum,cf,tot,esc;
    
    tot=sum=cf=esc=0;
    for (i=cp;i!=NIL;i=nb[i]) {
	if (!cmask[fc[i]]) {
	    if (fa[i]<ESCTH) ++esc;
	    if (cf==0 && fc[i]==c) {
		sum=tot;
		cf=fa[i];
		as[usp]=i;
	    }
	    tot+=fa[i];
	}
    }	
    esc=adj_escape_prob(esc,cp);
    if (cf==0) {
	ac_out(tot,tot+esc,tot+esc);
	for (i=cp;i!=NIL;sum=i,i=nb[i]) {
	    if (!cmask[fc[i]]) {
		cmstack[cmsp++]=fc[i];
		cmask[fc[i]]=1;
	    }
	}
	as[usp]=sum;  /* sum holds last i ! */
	if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
	cps[usp++]=0x8000|cp; 
	return 0;
    }
    ac_out(sum,sum+cf,tot+esc);
    ++nec;   /* must add test used in code_first() if NECMAX<5 ! */
    if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
    cps[usp++]=cp;
    return 1;
}

static  void code_new(S16B c) {
    
    register S16B i;
    register U16B sum,tot;
    
    sum=0;
    tot=257-cmsp;
    for (i=0;i<c;++i) sum+=1-cmask[i];
    ac_out(sum,sum+1,tot);
}

static  S16B decode_first(U16B cp) {
    
    register U16B c;
    register U16B tv;
    register U16B i;
    register S16B sum,tot,esc,cf;
    register unsigned char sv;

    esc=adj_escape_prob(fe[cp],cp);
    tot=ft[cp];
    if (nec>=NECLIM) {
	if (tot<=NECTLIM && nec==NECMAX) sv=2;
	else sv=1;
	tot<<=sv;
	tv=ac_threshold_val(tot+esc)>>sv;
	for (c=cp,sum=0;;c=nb[c]) {
	    if (c==NIL) break;
	    if (sum+fa[c]<=tv) sum+=fa[c];
	    else {
		cf=fa[c]<<sv;
		break;
	    }
	}
	sum<<=sv;
    }
    else {
	tv=ac_threshold_val(tot+esc);
	for (c=cp,sum=0;;c=nb[c]) {
	    if (c==NIL) break;
	    if (sum+fa[c]<=tv) sum+=fa[c];
	    else {
		cf=fa[c];
		break;
	    }
	}
    }
    usp=1;
    if (c!=NIL) {
	ac_in(sum,sum+cf,tot+esc);
	if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
	as[0]=c;
	cps[0]=cp;
	c=fc[c];
	if (nec<NECMAX) ++nec;
    }
    else {
	ac_in(tot,tot+esc,tot+esc);
	if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
	for (i=cp;i!=NIL;sum=i,i=nb[i]) {
	    cmstack[cmsp++]=fc[i];
	    cmask[fc[i]]=1;
	}
	cps[0]=0x8000|cp;
	as[0]=sum;
	c=ESC;
	nec=0;
    }
    return c;
}

static  S16B decode_rest(U16B cp) {

    register U16B c;
    register U16B tv;
    register U16B i;
    register S16B sum,tot,esc,cf;
    
    esc=tot=0;
    for (i=cp;i!=NIL;i=nb[i]) {
	if (!cmask[fc[i]]) {
	    tot+=fa[i];
	    if (fa[i]<ESCTH) ++esc;
	}
    }
    esc=adj_escape_prob(esc,cp);
    tv=ac_threshold_val(tot+esc);
    for (c=cp,sum=0;;c=nb[c]) {
	if (c==NIL) break;
	if (!cmask[fc[c]]) {
	    if (sum+fa[c]<=tv) sum+=fa[c];
	    else {
		cf=fa[c];
		break;
	    }
	}
    }
    if (c!=NIL) {
	ac_in(sum,sum+cf,tot+esc);
	if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
	as[usp]=c;
	cps[usp++]=cp;
	c=fc[c];
	++nec;  /* must add test used in code_first() if NECMAX<5 ! */
    }
    else {
	ac_in(tot,tot+esc,tot+esc);
	if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
	for (i=cp;i!=NIL;sum=i,i=nb[i]) {
	    if (!cmask[fc[i]]) {
		cmstack[cmsp++]=fc[i];
		cmask[fc[i]]=1;
	    }
	}
	cps[usp]=0x8000|cp;
	as[usp++]=sum;		/* sum holds last i !! */
	c=ESC;
    }
    return c;
}

static  S16B decode_new(void) {
    
    register S16B c;
    register U16B tv,sum,tot;
    
    tot=257-cmsp;
    tv=ac_threshold_val(tot);
    for (c=sum=0;c<256;++c) {
	if (cmask[c]) continue;
	if (sum+1<=tv) ++sum;
	else break;
    }
    ac_in(sum,sum+1,tot);	
    return c;
}

#define code_byte(cp,c) (cmsp?code_rest(cp,c):code_first(cp,c))
#define decode_byte(cp) (cmsp?decode_rest(cp):decode_first(cp))

/***********************************************************************
	Encoding
***********************************************************************/

void hsc_pack(void) {

    S16B c;
    U16B cp;
    unsigned char ncmax,ncmin;
    
    init_pack();
    for (;(c=getbyte())>=0;) {
	cp=find_longest();
	ncmin=cp==NIL?0:cl[cp]+1;
	ncmax=maxclen+1;
	for(;;) {
	    if (cp==NIL) {
		code_new(c);
		break;
	    }			
	    if (code_byte(cp,c)) {
		el_movefront(cp);
		break;
	    }		
	    cp=find_next();
	}
	add_model(c);
	while (ncmax>ncmin) make_context(--ncmax,c);
	move_context(c);
    }
    cp=find_longest();
    while (cp!=NIL) {
	code_byte(cp,ESC);
	cp=find_next();
    }
    code_new(ESC);
    ac_end_encode();
    hsc_cleanup();
}

/***********************************************************************
	Decoding
***********************************************************************/

void hsc_unpack(void) {

    S16B c;
    U16B cp;
    unsigned char ncmax,ncmin;
    
    init_unpack();
    for (;;) {
	cp=find_longest(); 
	ncmin=cp==NIL?0:cl[cp]+1;
	ncmax=maxclen+1;
	for(;;) {
	    if (cp==NIL) {
		c=decode_new();
		break;
	    }			
	    if ((c=decode_byte(cp))!=ESC) {
		el_movefront(cp);
		break;
	    }		
	    cp=find_next();
	}
	if (c==ESC) break;
	add_model(c);
	while (ncmax>ncmin) make_context(--ncmax,c);
	putbyte(c);
	move_context(c);
    }
    flush();
    hsc_cleanup();
}

