/*
 *	for new GOGO-no-coda (1999, 2000)
 *	based on lame3.28beta and optimized by shigeo
 */
#define	PEN_LPF
#define	PEN_MS

/*
 *	99/08/01
 *	count_bit()4Ԗڂ̈0łȂ̃`FbN֐ĂяoOɂ
 *		x2.00x
 *	99/08/03
 *	̂Ƃ肵čœK
 *	EquntizerStepSizeintł悢
 *		炭̓o[WςĂO.K.
 *	EC̃[?
 *		(int)(-1.4)=(int)(-1.5)=(int)(-1.6)=-1
 *		(int)( 1.4)=(int)( 1.5)=(int)( 1.6)= 1
 *	Equantize_xrpow()asm(FPU,3DN)
 *		x1.30x
 *	99/08/09
 *	calc_noise2()̍œK & ꕔasm(FPU)
 *		x1.10x
 *	99/08/10
 *	calc_noise{1,2}()̍œK
 *		x1.05x
 *	99/08/11
 *	calc_pow4P3dual()3DN
 *		x0.93x
 *	99/08/14
 *	̂Ƃ肵čœK
 *	EsfBandIndex[]̒gSċł邱
 *	Esubdivide̒
 *		bigvalues_region = 2 * cod_info->big_values;ƂȂĂ邱
 *	E(̂Ƃ)cod_info->address{1,2,3}͑Sċ
 *	Enew_choose_table,ix_maxbeginend͋łƂĂ悢
 *	99/08/27
 *	vUɑ
 *		calc_pow075()3DN
 *		x0.65x
 *      99/10/11
 *      lame3.28Qlɑ啝ύX & VBR Ή
 *	99/10/30
 *		stereoȊOł͌ɖ߂ȂB̂ߊSɈڍsB
 *	00/01/06
 *		lame3.55ɒǏ]Ă݂悤ƂǁAB
 *	00/01/11 ЂƂ܂I
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <assert.h>
#include "musenc.h"
#include "global.h"
#include "loop.h"
#include "huffman.h"
#include "l3bs.h"
#include "haveunit.h"
#include "l3psy.h"

/* CPUɈ˂ĕύXꂤ֐ւ̃|C^ */

static int (*quantize_xrpow)(float xr[576], int ix[576], gr_info *cod_info);
static int (*count_bit)(int ix[576],unsigned int start,unsigned int end,unsigned int table);
static void (*calc_pow4P3dual)(int *ix,float *xr,float *step,float *sum,int n,int next);
static void (*calc_pow075)( float *xr, float *xrpow );
static void (*ms_convert)(float xr[2][576],float xr_org[2][576]);
static int (*count_nz_xr)(float xr[]);
static void (*set_l3_enc_sign)(float xr[], int l3_enc[]);
static int (*new_choose_table)( int ix[576], unsigned int begin, unsigned int end, int *bits );
static int (*calc_runlen_count1)( int *ix, gr_info *cod_info);
int (*ix_max)(int ix[576], unsigned int begin, unsigned int end); /* exported to huffmana.nas */

/* svstatic */
static int convert_mdct, convert_psy, reduce_sidechannel;

static int count_bits(int *ix, gr_info *cod_info);

/* 16, 24kHz ̃e[uC 00/01/20 from tables.c in lame3.60 */
static const struct scalefac_struct sfBandIndex[6] =
{
  { /* Table B.2.b: 22.05 kHz */
    {0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
    {0,4,8,12,18,24,32,42,56,74,100,132,174,192}
  },
  { /* Table B.2.c: 24 kHz */
    {0,6,12,18,24,30,36,44,54,66,80,96,114,136,162,194,232,278,332,394,464,540,576},
    {0,4,8,12,18,26,36,48,62,80,104,136,180,192}
  },
  { /* Table B.2.a: 16 kHz */
    {0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
    {0,4,8,12,18,26,36,48,62,80,104,134,174,192}
  },
  { /* Table B.8.b: 44.1 kHz */
    {0,4,8,12,16,20,24,30,36,44,52,62,74,90,110,134,162,196,238,288,342,418,576},
    {0,4,8,12,16,22,30,40,52,66,84,106,136,192}
  },
  { /* Table B.8.c: 48 kHz */
    {0,4,8,12,16,20,24,30,36,42,50,60,72,88,106,128,156,190,230,276,330,384,576},
    {0,4,8,12,16,22,28,38,50,64,80,100,126,192}
  },
  { /* Table B.8.a: 32 kHz */
    {0,4,8,12,16,20,24,30,36,44,54,66,82,102,126,156,194,240,296,364,448,550,576},
    {0,4,8,12,16,22,30,42,58,78,104,138,180,192}
  }
};

static const unsigned sfbBlkSub[2][2][4] = {
  {{6, 5, 5, 5}, {9, 9, 9, 9}}, {{11, 10, 0, 0}, {18, 18, 0, 0}}
};

static struct{
	int size;
	int max;
} resv;

const int *scalefac_long  = &sfBandIndex[3].l[0];
const int *scalefac_short = &sfBandIndex[3].s[0];

/* Kvȕϐ */
static int OldValue = -30;	/* guess it or so. */
static int firstcall = 1;
static float scalefac_shortR[SFB_SMAX]; /* = 1 / ( scalefac[i+1] - scalefac[i] ) */
extern float scalefac_longR[SFB_LMAX]; /* defined in quantize.nas */
float masking_lower = 1; /* exported for quantize.nas */

static int ResvFrameBegin(int mean_bits, int frameLength){
	int fullFrameBits;
	int limit;

	if(gl.mode_gr == 2){
		limit = 4088;
		fullFrameBits = mean_bits * 2 + resv.size;
	}else{
		limit = 2040;
		fullFrameBits = mean_bits + resv.size;
	}

	resv.max = 7680 - frameLength;
	if(resv.max > limit){
		resv.max = limit;
	}else if(resv.max < 0){
		resv.max = 0;
	}
	return fullFrameBits;
}

static void ResvMaxBits2(int mean_bits, int *targ_bits, int *extra_bits){
	int adj, ext, targ;

	targ = mean_bits;

	adj = resv.size - ((resv.max * 9) / 10);
	if(adj > 0){
		targ += adj;
	}else{
		adj = 0;
		targ -= (int)(mean_bits / 15.2);
	}

	ext = (resv.max * 6) / 10;
	if(resv.size < ext){
		ext = resv.size;
	}
	ext -= adj;
	if(ext < 0) ext = 0;

//	if(targ > 4095) targ = 4095;
//	if(targ + ext > 4095) ext = 4095 - targ;

	*targ_bits = targ;
	*extra_bits = ext;
}

static void ResvAdjust(int len, int mean_bits){
	if(gl.stereo == 2){
		resv.size += (mean_bits >> 1) - len;
	}else{
		resv.size += mean_bits - len;
	}
}

static int ResvFrameEnd(int mean_bits){
	int ret;
	int over;

	/* just in case mean_bits is odd, this is necessary... */
	if(gl.stereo == 2 && (mean_bits & 1)) resv.size++;

	over = resv.size - resv.max;
	if( over > 0 ){
		resv.size = resv.max;
		ret = over;
	}else{
		ret = 0;
	}

	over = resv.size & 7;
	ret += over;
	resv.size -= over;
	return ret;
}

/* quantize.nasƂ̐(12899%Jo[) */
/* 8192+14=8206őSĂJo[ */

#define PRECALC_SIZE 8206 

extern float pow4P3_table[PRECALC_SIZE];

static void calc_noise_init(void){
	int i;
	for( i = 0; i < PRECALC_SIZE; i++ ){
		pow4P3_table[i] = pow(i, 4.0/3.0);
	}
}

typedef enum {
    BINSEARCH_NONE,
    BINSEARCH_UP, 
    BINSEARCH_DOWN
} binsearchDirection_t;

/* input: xrpow, cod_info  output:ix */
static int bin_search_StepSize2(int desired_rate, int *ix, float *xrpow, gr_info *cod_info){
	int flag_GoneOver = 0;
	int CurrentStep = 4;	/* ̐ɂȂȂ̂ŊZ̓Vtg */
	int nBits;
	int StepSize = OldValue;
	binsearchDirection_t Direction = BINSEARCH_NONE;

	for(;;){
		cod_info->quantizerStepSize = StepSize;
	/* for avoiding overflow of FPU */
		if( StepSize < -128 ){/* ͓K pow(2,Size*3/16)`2^32Ȃ̂Ŗ薳Ƃ͎v */
			nBits = 100000;
		}else
		{
			nBits = quantize_xrpow(xrpow, ix, cod_info);
			nBits = ( nBits <= 8191 + 14 ) ? count_bits(ix,cod_info) : 100000;
		}

		if( CurrentStep == 1 ){
			break; /* nothing to adjust anymore */
		}
		if( flag_GoneOver ){
			CurrentStep >>= 1;
		}
		if( nBits > desired_rate ){  /* increase Quantize_StepSize */
			if( Direction == BINSEARCH_DOWN && !flag_GoneOver ){
				flag_GoneOver = 1;
				CurrentStep >>= 1;
			}
			Direction = BINSEARCH_UP;
			StepSize += CurrentStep;
		}else{
			if( nBits < desired_rate ){
				if( Direction == BINSEARCH_UP && !flag_GoneOver ){
					flag_GoneOver = 1;
					CurrentStep >>= 1;
				}
				Direction = BINSEARCH_DOWN;
				StepSize -= CurrentStep;
			}else{
				break; /* nBits == desired_rate;; most unlikely to happen */
			}
		}
	}
	OldValue = StepSize;
	return nBits;
}

/* convert from L/R <-> Mid/Side */
void ms_convert_3DN(float xr[2][576],float xr_org[2][576]);
void ms_convert_FPU(float xr[2][576],float xr_org[2][576]);
void ms_convert_SSE(float xr[2][576],float xr_org[2][576]);
#ifdef USE_E3DN
void ms_convert_E3DN(float xr[2][576],float xr_org[2][576]);
#endif

void
setup_ms_convert(int useUNIT){
#ifdef USE_E3DN
	if( useUNIT & tE3DN ){
		SETUP_DSP("use:ms_convert_E3DN:\n");
		ms_convert = ms_convert_E3DN;
	}else
#endif
	if( useUNIT & t3DN ){
		SETUP_DSP("use:ms_convert_3DN:\n");
		ms_convert = ms_convert_3DN;
	}else
	if( useUNIT & tSSE ){
		SETUP_DSP("use:ms_convert_SSE:\n");
		ms_convert = ms_convert_SSE;
	}else
	{
		SETUP_DSP("use:ms_convert_FPU:\n");
		ms_convert = ms_convert_FPU;
	}
}

/* 99/09/21 ܂ */

static void pow2i025(int n, float *ret){
	int m;
	/* table[i] = 2^(i/4) */
	float table[4]={1,1.18920711498,1.41421356237,1.68179283048};
	
	m = n >> 2;
	m = (m << 23) + 0x3F800000;	/* = 2^m */
	*ret = table[ n & 3 ] * *(float *)&m;
}

/*
 *	99/08/27 by shigeo
 *	`Ȃ̂cĂ
 */

void calc_pow075_3DN( float *xr, float *xrpow );
#ifdef USE_E3DN
void calc_pow075_E3DN( float *xr, float *xrpow );
#endif
void calc_pow075_SSE( float *xr, float *xrpow );
void calc_pow075_NONE( float *xr, float *xrpow );

void
setup_calc_pow075(int useUNIT){
#ifdef USE_E3DN
	if( useUNIT & tE3DN ){
		SETUP_DSP("use:calc_pow075_E3DN\n");
		calc_pow075 = calc_pow075_E3DN;
	}else
#endif
	if( useUNIT & t3DN ){
		SETUP_DSP("use:calc_pow075_3DN\n");
		calc_pow075 = calc_pow075_3DN;
	}else
	if( useUNIT & tSSE ){
		SETUP_DSP("use:calc_pow075_SSE\n");
		calc_pow075 = calc_pow075_SSE;
	}else
	{
		SETUP_DSP("use:calc_pow075_NONE\n");
		calc_pow075 = calc_pow075_NONE;		/* 40k clk */
    }
}

/* flag = 1 if distort[ch][0][i] > 0 for i = 17,18,19,20  */
/* SĂdistort[]=0̎Ȃ0Ԃ */

static
int
preemphasis(float xr[576], float xrpow[576], ratio_t *l3_xmin,
		int gr, int ch, III_side_info_t *l3_side, float distort[4][CBLIMIT] )
{
	gr_info *cod_info = &l3_side->gr[gr].ch[ch].tt;
	static const float pow_tbl1[21] =	/* =sqr2 ^ pretab[i] */
	{
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, SQRT2, SQRT2, SQRT2, SQRT2,
		2, 2, 2*SQRT2, 2*SQRT2, 2*SQRT2, 2
	};
#define P 1.29683955463	/* = 2^(3/8) */
	static const float pow_tbl2[21] = /* = pow_tbl1[i] ^ 0.75 */
	{
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, P, P, P, P,
		P*P,P*P,P*P*P,P*P*P,P*P*P,P*P
	};
#undef P
	int i, sfb, start, end;
	float t,t34;
	/*
	 * Preemphasis is switched on if in all the upper four scalefactor
	 * bands the actual distortion exceeds the threshold after the
	 * first call of the inner loop
	 */
	if( cod_info->block_type == SHORT_TYPE || cod_info->preflag ||
		distort[0][17] <= 0 ||
		distort[0][18] <= 0 ||
		distort[0][19] <= 0 ||
		distort[0][20] <= 0 ) return 0;

	cod_info->preflag = 1;
	/* t==1̎͊ɏO */
	for( sfb = 11; sfb < cod_info->sfb_lmax; sfb++ ){
		t   = pow_tbl1[sfb];
		t34 = pow_tbl2[sfb];
		if( cod_info->scalefac_scale ){
			t *= t;
			t34 *= t34;
		}
		l3_xmin->l[gr][ch][sfb] *= t*t;
		start = scalefac_long[ sfb ];
		end   = scalefac_long[ sfb+1 ];
		for( i = start; i < end; i++ ) xr[i] *= t;
		for( i = start; i < end; i++ ) xrpow[i] *= t34;
	}
	return 1;
}

/* SĂdistort[]=0̎Ȃ0Ԃ */

static
int
amp_scalefac_bands( float xr[576], float xrpow[576],
		ratio_t *l3_xmin, III_side_info_t *l3_side,
		III_scalefac_t *scalefac,
		int gr, int ch, int iteration, float distort[4][CBLIMIT])
{
	int sfb, i, over = 0;
	float ifqstep, ifqstep2, ifqstep34;
	D192_3 *xr_s;
	D192_3 *xrpow_s;
	gr_info *cod_info;
	float distort_thresh;
	cod_info = &l3_side->gr[gr].ch[ch].tt;

	xr_s = (D192_3 *) xr;
	xrpow_s = (D192_3 *) xrpow;
	if( cod_info->scalefac_scale ){
		ifqstep = 2;
		ifqstep2 = 4;
		ifqstep34 = 1.68179283050743; /* = 2^(3/4) */
	}else{
		ifqstep = SQRT2;
		ifqstep2 = 2;
		ifqstep34 = 1.29683955465101; /* = 2^(3/8) */
	}
	/* distort_thresh = 0, unless all bands have distortion 
	 * less than masking.  In that case, just amplify bands with distortion
	 * within 95% of largest distortion/masking ratio */

	distort_thresh = -900 / 1.05;
	for( sfb = 0; sfb < cod_info->sfb_lmax; sfb++ ){
		distort_thresh = Max( distort[0][sfb], distort_thresh );
	}
	distort_thresh *= 1.05;
	distort_thresh = Min( distort_thresh, 0 );

	for( sfb = 0; sfb < cod_info->sfb_lmax; sfb++ ){
		int start, end;
		if( distort[0][sfb] <= distort_thresh ) continue;
		over++;
		l3_xmin->l[gr][ch][sfb] *= ifqstep2;
		scalefac->l[gr][ch][sfb]++;
		start = scalefac_long[sfb];
		end   = scalefac_long[sfb+1];
		for( i = start; i < end; i++ ) xr[i] *= ifqstep;
		for( i = start; i < end; i++ ) xrpow[i] *= ifqstep34;
	}

	/*
	 * Note that scfsi is not enabled for frames containing short blocks
	 */

	distort_thresh = -900 / 1.05;
	for( i = 0; i < 3; i++ ){
		for( sfb = cod_info->sfb_smax; sfb < SFB_SMAX - 1; sfb++ ){
			distort_thresh = Max( distort[i+1][sfb], distort_thresh );
		}
	}
	distort_thresh *= 1.05;
	distort_thresh = Min( distort_thresh, 0 );

	for( i = 0; i < 3; i++ ){
		for( sfb = cod_info->sfb_smax; sfb < SFB_SMAX - 1; sfb++ ){
			int start, end, j;
			if( distort[i+1][sfb] <= distort_thresh ) continue;
			over++;
			l3_xmin->s[gr][ch][sfb][i] *= ifqstep2;
			scalefac->s[gr][ch][sfb][i]++;
			start = scalefac_short[sfb];
			end   = scalefac_short[sfb+1];
			for( j = start; j < end; j++ ) (*xr_s)[j][i] *= ifqstep;
			for( j = start; j < end; j++ ) (*xrpow_s)[j][i] *= ifqstep34;
		}
	}
	return over;
} /* end of amp_scalefac_bands */

void calc_pow4P3mono(int *ix,float *xr,float *step,float *sum,int n,int next);

/* xfsf͕sv */

static
int
calc_noise1( float xr[576], int ix[576], gr_info *cod_info,
	float distort[4][CBLIMIT],
	ratio_t *l3_xmin,int gr, int ch, float *over_noise,
	float *tot_noise, float *max_noise )
{
	int sfb, i, over = 0;
	float step;

	D192_3 *xr_s = (D192_3 *)xr;
	I192_3 *ix_s = (I192_3 *)ix;
	int count = 0;
	*over_noise = 0;
	*tot_noise = 0;
	*max_noise = -999;

	pow2i025( cod_info->quantizerStepSize, &step );
	for( sfb = 0; sfb < cod_info->sfb_lmax; sfb++ ){
		double temp;
		int start, bw;
		float sum;
		start = scalefac_long[ sfb ];
		bw = scalefac_long[ sfb+1 ] - start;
		calc_pow4P3mono( &ix[start], &xr[start], &step, &sum, bw, 1 );
		temp = sum / ( bw * l3_xmin->l[gr][ch][sfb] );
		if( temp > 0.001 ){
			temp = 10 * log10( temp );
		}else{
			temp = -30;
		}
		distort[0][sfb] = temp;
		if( temp > 0 ){
			over++;
			*over_noise += temp;
		}
		*tot_noise += temp;
		*max_noise = Max( *max_noise, temp );
		count++;
	}

	for( i = 0; i < 3; i++ ){
		for( sfb = cod_info->sfb_smax; sfb < SFB_SMAX - 1; sfb++ ){
			double temp;
			int start, bw;
			float sum;
			start = scalefac_short[ sfb ];
			bw = scalefac_short[ sfb+1 ] - start;
			calc_pow4P3mono( &(*ix_s)[start][i], &(*xr_s)[start][i], &step, &sum, bw, 3 );
			temp = sum / ( bw * l3_xmin->s[gr][ch][sfb][i] );
			if( temp > 0.001 ){
				temp = 10 * log10( temp );
			}else{
				temp = -30;
			}
			distort[i+1][sfb] = temp;
			if( temp > 0 ){
				over++;
				*over_noise += temp;
			}
			*tot_noise += temp;
			*max_noise = Max( *max_noise, temp );
			count++;
		}
	}
	if( count > 1 ) *tot_noise /= count;
	if( over > 1 ) *over_noise /= over;
	return over;
} /* end of calc_noise1 */

/*
 *	dual`lpcalc_noise
 *	L/R̃`l̂l(masking thresholds)g
 *	midside`l̗ʎqƂɌĂ
 */

/*
 *	99/08/09
 *	̎肵čœK
 *	stereo=2
 */

void calc_pow4P3dual_3DN(int *ix,float *xr,float *step,float *sum,int n,int next);
void calc_pow4P3dual_FPU(int *ix,float *xr,float *step,float *sum,int n,int next);
void calc_pow4P3dual_SSE(int *ix,float *xr,float *step,float *sum,int n,int next);

void
setup_calc_pow4P3dual(int useUNIT){
	if(useUNIT & t3DN){
		SETUP_DSP("use:calc_pow4P3dual_3DN\n");
		calc_pow4P3dual=calc_pow4P3dual_3DN;
	}else if(useUNIT & tSSE){
		SETUP_DSP("use:calc_pow4P3dual_SSE\n");
		calc_pow4P3dual=calc_pow4P3dual_SSE;
	}else{
		SETUP_DSP("use:calc_pow4P3dual_FPU\n");
		calc_pow4P3dual=calc_pow4P3dual_FPU;
        }
}

/* ̊֐ convert_psy == 1 ̎̂݌Ă΂̂ stereo == 2 ł */

/*
 *	xfsf͕sv, distort[0][i][j] = distort[1][i][j]
 */

static
void
calc_noise2( float xr[2][576], int ix[2][576], gr_info *cod_info[2],
		float distort[4][CBLIMIT], ratio_t *l3_xmin,int gr, int over[2], 
		float over_noise[2], float tot_noise[2], float max_noise[2] )
{
	int	start, sfb;
	float sum[2],step[2];
	int bw;
	int ch;

	over_noise[0] = over_noise[1] = 0; /* lameł block_type == SHORT_TYPE ̎ǂ킴? */
	tot_noise[0] = tot_noise[1] = 0;
	max_noise[0] = max_noise[1] = -999;

	pow2i025( cod_info[0]->quantizerStepSize, &step[0] );
	pow2i025( cod_info[1]->quantizerStepSize, &step[1] );
	over[0] = over[1] = 0;
	/* calc_noise2: ̃`lblock type͓Ƃ */
	
	if( cod_info[0]->block_type != SHORT_TYPE ){ /* wǂ */
		for ( sfb = 0; sfb < SFB_LMAX-1; sfb++ ){
			float dis_temp[2];
			start = scalefac_long[ sfb ];
			bw = scalefac_long[ sfb+1 ] - start;

			calc_pow4P3dual(&ix[0][start],&xr[0][start],step,sum,bw,1);
			
			for( ch = 0; ch < 2; ch++ ){
				float temp;

				temp = sum[ch] / ( bw * l3_xmin->l[gr][ch][sfb] );
				if( temp > 0.001 ){
					temp = 10 * log10( temp );
				}else{
					temp = -30;
				}
				dis_temp[ch] = temp;
				if( temp > 0 ){
					over[ch]++;
					over_noise[ch] += temp;
				}
				tot_noise[ch] += temp;
				max_noise[ch] = Max( max_noise[ch], temp );
			}
			distort[0][sfb] = Max( dis_temp[0], dis_temp[1] );
		}
		return;
	}
	{
		D192_3 *xr_s[2];
		I192_3 *ix_s[2];
		xr_s[0] = (D192_3 *) xr[0];
		xr_s[1] = (D192_3 *) xr[1];
		ix_s[0] = (I192_3 *) ix[0];
		ix_s[1] = (I192_3 *) ix[1];

		for( sfb = 0; sfb < SFB_SMAX - 1; sfb++ ){
			float dis_temp[2];
			int i;
			start = scalefac_short[ sfb ];
			bw = scalefac_short[ sfb+1 ] - start;
			for( i = 0; i < 3; i++ ){
				calc_pow4P3dual(&(*ix_s[0])[start][i],&(*xr_s[0])[start][i], step,sum,bw,3);
				
				for( ch = 0; ch < 2; ch ++ ){
					float temp;
					temp = sum[ch] / ( bw * l3_xmin->s[gr][ch][sfb][i] );
					if( temp > 0.001 ){
						temp = 10 * log10( temp );
					}else{
						temp = -30;
					}
					
					dis_temp[ch] = temp > 0;
					if( temp > 0 ){
						over[ch]++;
						over_noise[ch] += temp;
					}
					tot_noise[ch] += temp;
					max_noise[ch] = Max( max_noise[ch], temp);
				}
				distort[i+1][sfb] = Max( dis_temp[0], dis_temp[1] );
			}
		}
	}
} /* calc_noise2() */

/* 
compute the ATH for each scalefactor band 
cd range:  0..96db

Input:  3.3kHz signal  32767 amplitude  (3.3kHz is where ATH is smallest = -5db)
longblocks:  sfb=12   en0/bw=-11db    max_en0 = 1.3db
shortblocks: sfb=5           -9db              0db

Input:  1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1 (repeated)
longblocks:  amp=1      sfb=12   en0/bw=-103 db      max_en0 = -92db
            amp=32767   sfb=12           -12 db                 -1.4db 

Input:  1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1 (repeated)
shortblocks: amp=1      sfb=5   en0/bw= -99                    -86 
            amp=32767   sfb=5           -9  db                  4db 


MAX energy of largest wave at 3.3kHz = 1db
AVE energy of largest wave at 3.3kHz = -11db
Let's take the average:  -5db = maximum signal in sfb=12.  
Dynamic range of CD: 96db.  Therefor energy of smallest audible wave 
in sfb=12  = -5  - 96 = -101db = ATH at 3.3kHz.  

ATH formula for this wave: -5db.  To adjust to LAME scaling, we need
ATH = ATH_formula  - 96  (db)
ATH = ATH * 2.5e-10      (ener)

*/
/* ̊֐̓GR[h̍ŏɈxĂ΂ */
float ATH_l[CBLIMIT]; /* exported for quantize.nas */
static float ATH_s[CBLIMIT];

static
float
ATHformula(float f)
{
  float ath;
  f  = Max(0.02, f);
  /* from Painter & Spanias, 1997 */
  /* minimum: (i=77) 3.3kHz = -5db */
  ath=(3.640 * pow(f,-0.8)
       -  6.500 * exp(-0.6*pow(f-3.3,2.0))
       +  0.001 * pow(f,4.0));
  
  /* convert to energy */
  ath -= 114;
  ath = pow( 10, ath/10.0 );
  return ath;
}
 
/* ATH_{s,l}̏ */
static
void
compute_ath(void)
{
  int sfb,i,start,end;
  float ATH_f;
  float freqkHz = gl.enc_freqHz * 0.001;

  /* last sfb is not used */
  for ( sfb = 0; sfb < SFB_LMAX-1; sfb++ ) {
    start = scalefac_long[ sfb ];
    end   = scalefac_long[ sfb+1 ];
    ATH_l[sfb]=1e38;//1e99;
    for (i=start ; i < end; i++) {
      ATH_f = ATHformula(freqkHz*i/(2*576));
      ATH_l[sfb]=Min(ATH_l[sfb],ATH_f);
    }
  }

  for ( sfb = 0; sfb < SFB_SMAX - 1; sfb++ ){
    start = scalefac_short[ sfb ];
    end   = scalefac_short[ sfb+1 ];
    ATH_s[sfb]=1e38;
    for (i=start ; i < end; i++) {
      ATH_f = ATHformula(freqkHz*i/(2*192));
      ATH_s[sfb]=Min(ATH_s[sfb],ATH_f);
    }
  }
}

/* iteration_loop̂݌Ă΂ */

static
void
on_pe( float pe[2], III_side_info_t *l3_side, int targ_bits[2],
		int mean_bits, int gr )
{
	gr_info *cod_info;
	int extra_bits, tbits;
	int ch;

	ResvMaxBits2( mean_bits, &tbits, &extra_bits);
	if( gl.stereo == 2 ) tbits /= 2;

	for( ch = 0; ch < gl.stereo; ch ++){
		int add_bits;
		cod_info = &l3_side->gr[gr].ch[ch].tt;

		add_bits= ( pe[ch] - 750 ) * 0.7142857143; /* = 1/1.4 */

		if( cod_info->block_type == SHORT_TYPE && add_bits < 500 ){
			add_bits = 500;
		}

		if( add_bits < 0) add_bits = 0;

		if( add_bits > extra_bits ) add_bits = extra_bits;
		if( add_bits > 4095 - tbits )
			add_bits = 4095 - tbits;

		targ_bits[ch] = tbits + add_bits;
		extra_bits -= add_bits;
	}
}

static
void
reduce_side( int targ_bits[2], float ms_ener_ratio, int mean_bits )
{
	/*  ms_ener_ratio = 0:  allocate 66/33  mid/side  fac=.33
	 *  ms_ener_ratio =.5:  allocate 50/50 mid/side   fac= 0
	 * 75/25 split is fac=.5
	 * float fac = .50*(.5-ms_ener_ratio[gr])/.5;
	 */
	float fac;
	int max_bits;
	fac = 0.5 - ms_ener_ratio;
	if( fac > 0 && targ_bits[1] >= 125 ){
		fac *= (0.33 * 2) * targ_bits[1];
		if( targ_bits[1] - fac > 125 ){
			targ_bits[0] += fac;
			targ_bits[1] -= fac;
		}else{
#ifdef	PEN_MS
			if( targ_bits[1] >= 125 ){ 
				targ_bits[0] += targ_bits[1] - 125;
				targ_bits[1] = 125;
			}
#else
			targ_bits[0] += targ_bits[1] - 125;
			targ_bits[1] = 125;
#endif
		}
	}
	max_bits = Min( 4095, mean_bits / 2 + 1200 );
	if( targ_bits[0] > max_bits ) targ_bits[0] = max_bits;
	if( targ_bits[1] > max_bits ) targ_bits[1] = max_bits;
}

/* stereo == 2, convert_psy == TRUE */

static
void
quant_compare_dual( int better[2], int notdone[2], int best_over[2],
		float best_over_noise[2], int over[2], float over_noise[2] )
{
	/*
	 * noise is given in decibals (db) relative to masking thesholds.
	 * over_noise:  sum of quantization noise > masking
	 * tot_noise:   sum of all quantization noise
	 * max_noise:   max quantization noise 
	 */

	int overS = over[0] + over[1];
	int bestS = best_over[0] + best_over[1];
	int flag;
	flag = (over_noise[0]+over_noise[1]) < (best_over_noise[0]+best_over_noise[1]);
	if( overS != bestS ){
		flag = overS < bestS;
	}
	better[0] = flag & notdone[0];
	better[1] = flag & notdone[1];
}

#ifdef USE_VBR

/* best_*͑S0(lame3.59܂ł͂ȂĂ ) */
#define VBR_compare( t0, t3, t2, t1, over, tot_n, over_n, max_n ) \
	( (over) <= 0 && (over_n) <= 0 && (tot_n) <= 0 && (max_n) <= 0 )

#endif /* USE_VBR */

/* ܂ŏɌĂяo */

static
void
iteration_init( float xr_org[2][2][576], III_side_info_t *l3_side )
{
	gr_info *cod_info;
	int ch, gr, i;
	extern float lowpass1, lowpass2;

	if( firstcall ){
		firstcall = 0;
		l3_side->main_data_begin = 0;
		scalefac_long  = &sfBandIndex[ gl.freq_idx + gl.version * 3 ].l[0];
		scalefac_short = &sfBandIndex[ gl.freq_idx + gl.version * 3 ].s[0];
		compute_ath();
	}

	l3_side->resvDrain = 0;

	convert_mdct = 0;
	convert_psy = 0;
	reduce_sidechannel = 0;
	if( l3_side->mode_ext == MPG_MD_MS_LR ){
	{
		convert_mdct = 1;
		convert_psy = 1;
		reduce_sidechannel=1;
	}
	}
//QQQ lame3.55ł͖ȂĂ
	if( force_ms ){
		convert_mdct = 0;
		convert_psy = 0;
		reduce_sidechannel = 1;
	}

	if( lowpass1 > 0 ){
		float start,stop;
		for( gr = 0; gr < gl.mode_gr; gr++ ){
			for( ch = 0; ch < gl.stereo; ch++ ){
				if( l3_side->gr[gr].ch[ch].tt.block_type == SHORT_TYPE ){
					int j, start, stop;
					start = lowpass1 * 192;
					stop  = lowpass2 * 192;
					if( start > stop ) 
						stop = start;
					if( stop == start ){
						for( i = start * 3;  i < 192 * 3; i++ )
							xr_org[gr][ch][i] = 0;
					} else {
						for( j = 0; j < 3; j++ ){
							for( i = start;  i < 192; i++ ){
								int i0 = 3*i+j; 
								if (i<=stop) 
									xr_org[gr][ch][i0]*=cos((PI/2)*(i-start)/(stop-start));
								else 
									xr_org[gr][ch][i0] = 0;
							}
						}
					}
				}else{
					start = lowpass1 * 576;
					stop  = lowpass2 * 576;
					if( start > stop ) 
						stop = start;
					if( start == stop ){
						for( i = start; i < 576; i++ )
							xr_org[gr][ch][i] = 0;
					} else {
						for( i = start; i < 576; i++ ){
							if (i<=stop) 
								xr_org[gr][ch][i] *=  cos((PI/2)*(i-start)/(stop-start));
							else 
								xr_org[gr][ch][i] = 0;
						}
					}
				}
			}
		}
	}else /* lowpass > 0 ̎ sfb21 ͕sv */

	/* 16kHzȏJbg */
	if( sfb21 ){
#ifndef PEN_LPF
		for ( gr = 0; gr < gl.mode_gr; gr++ ){
			int start;
			if( l3_side->gr[gr].ch[0].tt.block_type == 2 ){
				start = scalefac_short[ SFB_SMAX-1 ] * 3;
			}else{
				start = scalefac_long[ SFB_LMAX-1 ];
			}
			for( ch = 0; ch < 2; ch++ ){
				for( i = start; i < 192 * 3; i++ ){
					xr_org[gr][ch][i] = 0;
				}
			}
		}
#else
		/* PEN@CL ǉ 2000/03/11 */
		{
			int		mask_bound;
			int		bindex;

			if( VBR ){
				static	const char vbrtoIndex[] = { 12, 12, 11, 11, 10, 9,  8, 7, 6, 5 };
				bindex =  vbrtoIndex[ VBR_q ] ;
			} else {
				bindex =  l3_side->rate_idx;
			}

			if( gl.version == 0 ){
				// MPEG2
				if( gl.mode == MPG_MD_MONO ){
					const signed char fqtbl_mono[] = {
						3,3,2,1,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
					};
					mask_bound = fqtbl_mono [ bindex ];
				} else {
					const signed char fqtbl_st[] = {
						5,5,5,4,4,3,2,1,0,-1,-1,-1,-1,-1,-1
					};
					mask_bound = fqtbl_st[ bindex ];
				}
			} else {
				// MPEG1
				if( gl.mode == MPG_MD_MONO ){
					const signed char fqtbl_mono[] = {
						3,3,3,2,1,0,-1,-1,-1,-1,-1,-1,-1,-1,-1
					};
					mask_bound = fqtbl_mono [ bindex ];
				} else {
					const signed char fqtbl_st[] = {
						5,5,5,4,4,3,2,1,1,0,-1,-1,-1,-1,-1,-1
					};
					mask_bound = fqtbl_st[ bindex ];
				}
			}

			if( mask_bound >= 0 ){
				for ( gr = 0; gr < gl.mode_gr; gr++ ){
					int start;
					if( l3_side->gr[gr].ch[0].tt.block_type == 2 ){
						start = scalefac_short[ SFB_SMAX-1 - mask_bound ] * 3;
					}else{
						start = scalefac_long[ SFB_LMAX-1 - mask_bound  ];
					}
					for( i = start; i < 192 * 3; i++ ){
						xr_org[gr][0][i] = xr_org[gr][1][i] = 0;
					}
				}
			}

		}
#endif
	}

	/* inline gr_deco */
	for( gr = 0; gr < gl.mode_gr; gr++ ){
		for( ch = 0; ch < gl.stereo; ch++ ){
			cod_info = (gr_info *) &(l3_side->gr[gr].ch[ch]);
			if( cod_info->block_type == SHORT_TYPE ){
				cod_info->sfb_lmax = 0; /* No sb*/
				cod_info->sfb_smax = 0;
			}else{
	/* MPEG 1 ł͍Ōscalefactor͎gȂ */
				cod_info->sfb_lmax = SFB_LMAX - 1;
				cod_info->sfb_smax = SFB_SMAX - 1;    /* No sb */
			}
		}
	}
} /* endo fo iteration_init */

/*
Calculate the allowed distortion for each scalefactor band,
as determined by the psychoacoustic model.
xmin(sb) = ratio(sb) * en(sb) / bw(sb)
*/

/* l3_xmin͂ŏ */

static void (*calc_xmin_long)( float *xr, float *l3_xmin, float *ratio );

/* 2300clk on K6-III 350 */
void calc_xmin_long_3DN( float *xr, float *l3_xmin, float *ratio );
//void calc_xmin_long_SSE( float *xr, float *l3_xmin, float *ratio );

/* 4300clk on K6-III 350 */
static void calc_xmin_long_C( float *xr, float *l3_xmin, float *ratio ){
	int i, sfb;
	float en0;
	i = 0; /* = scalefac_long[0] */
	for( sfb = 0; sfb < SFB_LMAX - 1; sfb++ ){
		en0 = 0;
		for( ; i < scalefac_long[ sfb+1 ]; i += 2 ){
			en0 += xr[i] * xr[i] + xr[i+1] * xr[i+1];
		}
		en0 = en0 * masking_lower * scalefac_longR[sfb] * ratio[sfb];
		l3_xmin[sfb] = Max( ATH_l[sfb], en0 );
	}
}

void setup_calc_xmin_long( int useUNIT ){
	if( useUNIT & t3DN ){
		SETUP_DSP("use:calc_xmin_long_3DN\n");
		calc_xmin_long = calc_xmin_long_3DN;
	}else
	{
		SETUP_DSP("use:calc_xmin_long_C\n");
		calc_xmin_long = calc_xmin_long_C;
	}
}

static
void
calc_xmin( float xr[576], ratio_t *ratio,
		gr_info *cod_info, ratio_t *l3_xmin, int gr, int ch )
{
	int end, sfb, i;
	float en0, en1, en2, bw;
	D192_3 *xr_s = (D192_3 *) xr;
	i = scalefac_short[cod_info->sfb_smax];
	for( sfb = cod_info->sfb_smax; sfb < SFB_SMAX - 1; sfb++ ){
		end   = scalefac_short[ sfb + 1 ];
		bw = masking_lower * scalefac_shortR[sfb]; /* = masking_lower / ( end - i ) */
		en0 = en1 = en2 = 0;
		for( ; i < end; i++ ){ /* ɂ͖wǂȂ̂ŃR[hTCY𑝂₷Kv͂Ȃ */
			en0 += (*xr_s)[i][0] * (*xr_s)[i][0];
			en1 += (*xr_s)[i][1] * (*xr_s)[i][1];
			en2 += (*xr_s)[i][2] * (*xr_s)[i][2];
		}
		l3_xmin->s[gr][ch][sfb][0] = Max( ATH_s[sfb], ratio->s[gr][ch][sfb][0] * en0 * bw );
		l3_xmin->s[gr][ch][sfb][1] = Max( ATH_s[sfb], ratio->s[gr][ch][sfb][1] * en1 * bw );
		l3_xmin->s[gr][ch][sfb][2] = Max( ATH_s[sfb], ratio->s[gr][ch][sfb][2] * en2 * bw );
	}
	/* cod_info->sfb_lmax  0 łȂ SFB_LMAX */
	if( cod_info->sfb_lmax ) calc_xmin_long( xr, l3_xmin->l[gr][ch], ratio->l[gr][ch] );
} /* end of calc_min */

/*
 *	lame3.28init_outer_loopinit_outer_loop_dual𓝍
 *	ŏ
 *	l3_xmin, scalefac
 */

static
void
init_outer_loop(
	float xr[576], float xr_org[576], ratio_t  *l3_xmin,
	III_scalefac_t *scalefac, int gr, III_side_info_t *l3_side,
	ratio_t *ratio, int ch, int flag)
{
	gr_info *cod_info;  
	cod_info = &l3_side->gr[gr].ch[ch].tt;

	/* flaglame3.28*_dual̎convert_psy,łȂƂ 0 */

	if( flag ){
		calc_xmin( xr_org, ratio, cod_info, l3_xmin, gr, ch );
	}else{
		calc_xmin( xr, ratio, cod_info, l3_xmin, gr, ch );
	}

	memset( &scalefac->l[gr][ch][0], 0, sizeof(int) * SFB_LMAX );
	memset( &scalefac->s[gr][ch][0][0], 0, sizeof(int) * SFB_SMAX * 3 );

	cod_info->slen[0] = 0;
	cod_info->slen[1] = 0;
	cod_info->slen[2] = 0;
	cod_info->slen[3] = 0;
	cod_info->sfbTblSub = &sfbBlkSub[0][0][0];

	cod_info->quantizerStepSize = 0;

	cod_info->count1table_select= 0;
	cod_info->count1            = 0;
	cod_info->big_values        = 0;
	cod_info->part2_3_length    = 0;
	cod_info->scalefac_compress = 0;

	cod_info->table_select[0]   = 0;
	cod_info->table_select[1]   = 0;
	cod_info->table_select[2]   = 0;
	cod_info->region0_count     = 0;
	cod_info->region1_count     = 0;
	cod_info->preflag           = 0;
	cod_info->scalefac_scale    = 0;
	cod_info->part2_length      = 0;
	cod_info->adr1          = 0;
	cod_info->adr2          = 0;
	cod_info->adr3          = 0;
} /* end of init_outer_loop() */

static 
int
inner_loop( float *xrpow, int *l3_enc, int max_bits, gr_info *cod_info )
{
	int bits;

	cod_info->quantizerStepSize--;
	do{
		cod_info->quantizerStepSize++;
		bits = quantize_xrpow( xrpow, l3_enc, cod_info );
		bits = ( bits <= 8191 + 14 ) ? count_bits( l3_enc, cod_info ) : 100000;
	}while( bits > max_bits );
	return bits;
}

/* by kei */
static
int
count_nz_xr_C(float xr[])
{
	int  i;
	int  ct =0;
	int* p = (int*)xr;
	for( i = 0; i < 576; i++ ){
		if( *p & 0x7FFFFFFF )ct++;	/* fabs(*p) > 0 */
		p++;
	}
	return ct;
}

int count_nz_xr_3DN(float xr[]);
int count_nz_xr_MMX(float xr[]);
#ifdef USE_E3DN
int count_nz_xr_E3DN(float xr[]);
#endif

void setup_count_nz_xr(int useUNIT){
#ifdef USE_E3DN
	if( useUNIT & tE3DN ){
		SETUP_DSP("use:count_nz_xr_E3DN\n");
		count_nz_xr = count_nz_xr_E3DN;
	}else
#endif
	if( useUNIT & t3DN ){
		SETUP_DSP("use:count_nz_xr_3DN\n");
		count_nz_xr = count_nz_xr_3DN;
	}else
//	if( useUNIT & tSSE ){
//		SETUP_DSP("use:count_nz_xr_SSE\n");
//		count_nz_xr = count_nz_xr_SSE;
//	}else
	if( useUNIT & tMMX ){
		SETUP_DSP("use:count_nz_xr_MMX\n");
		count_nz_xr = count_nz_xr_MMX;
	}else
	{
		SETUP_DSP("use:count_nz_xr_C\n");
		count_nz_xr = count_nz_xr_C;
	}
}

static
int
scale_bitcount_hi( III_scalefac_t *scalefac, gr_info *cod_info, int gr, int ch )
{
	int k, max_slen1 = 0, max_slen2 = 0;
	int *tab;
	static int slen1_tab[16] = {0,18,36,54,54,36,54,72,54,72,90,72,90,108,108,126};
	static int slen2_tab[16] = {0,10,20,30,33,21,31,41,32,42,52,43,53,63,64,74};

	if( cod_info->block_type == SHORT_TYPE ){
		int (*fac)[3] = scalefac->s[gr][ch];
		tab = slen1_tab;
		for ( k = 0; k < 6; k++ ){
			if( fac[k][0] > max_slen1 ) max_slen1 = fac[k][0];
			if( fac[k][1] > max_slen1 ) max_slen1 = fac[k][1];
			if( fac[k][2] > max_slen1 ) max_slen1 = fac[k][2];
		}
		for( k = 6; k < SFB_SMAX - 1; k++ ){
			if( fac[k][0] > max_slen2 ) max_slen2 = fac[k][0];
			if( fac[k][1] > max_slen2 ) max_slen2 = fac[k][1];
			if( fac[k][2] > max_slen2 ) max_slen2 = fac[k][2];
		}
	}else{
		int *fac = scalefac->l[gr][ch];
		tab = slen2_tab;
		for( k = 0; k < 11; k++ ){
			if( fac[k] > max_slen1 ) max_slen1 = fac[k];
		}
		for( k = 11; k < 21; k++ ){
			if( fac[k] > max_slen2 ) max_slen2 = fac[k];
		}
	}

	/* from Takehiro TOMINAGA <tominaga@isoternet.org> 10/99
	 * loop over *all* posible values of scalefac_compress to find the
	 * one which uses the smallest number of bits.  ISO would stop
	 * at first valid index
	 */
	cod_info->part2_length = 10000;
	for( k = 0; k < 16; k++ ){
		static const int slen1[16] = {1,1,1,1,8,2,2,2,4,4,4,8,8,8,16,16};
		static const int slen2[16] = {1,2,4,8,1,2,4,8,2,4,8,2,4,8,4,8};
		if( max_slen1 < slen1[k] && max_slen2 < slen2[k]
			&& cod_info->part2_length > tab[k]
		){
			cod_info->part2_length = tab[k];
			cod_info->scalefac_compress = k;
			return 0;
		}
	}
	return 2;
} /* scale_bitcount */

static
int
scale_bitcount_lsf( III_scalefac_t *scalefac, gr_info *cod_info, int gr, int ch )
{
	int over;
	int i, k, sfb, max_sfac[4];
	const unsigned *partition_table;

	if( cod_info->block_type == SHORT_TYPE ){
		int (*fac)[3] = scalefac->s[gr][ch];
		partition_table = &sfbBlkSub[cod_info->preflag][1][0];
		for( sfb = 0, k = 0; k < 4; k++ ){
			int max = 0;
			for( i = 0; i < partition_table[k] / 3; i++, sfb++ ){
				if( fac[sfb][0] > max ) max = fac[sfb][0];
				if( fac[sfb][1] > max ) max = fac[sfb][1];
				if( fac[sfb][2] > max ) max = fac[sfb][2];
			}
			max_sfac[k] = max;
		}
	}else{
		int *fac = scalefac->l[gr][ch];
		partition_table = &sfbBlkSub[cod_info->preflag][0][0];
		for( sfb = 0, k = 0; k < 4; k++ ){
			int max = 0;
			for( i = 0; i < partition_table[k]; i++, sfb++ ){
				if( fac[sfb] > max ) max = fac[sfb];
			}
			max_sfac[k] = max;
		}
	}

	for(over = 0, k = 0; k < 4; k++){
		static const unsigned max_sfacTbl[2][4] = {
			{4, 4, 3, 3}, {3, 2, 0, 0},
		};

		if(max_sfac[k] > max_sfacTbl[cod_info->preflag][k]) over++;
	}
	if(!over){
	/*
	  Since no bands have been over-amplified, we can set scalefac_compress
	  and slen[] for the formatter
	*/
		static const int log2tab[] = {0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };

		unsigned slen1, slen2, slen3, slen4;

		cod_info->sfbTblSub = partition_table;
		for( k = 0; k < 4; k++ )
			cod_info->slen[k] = log2tab[max_sfac[k]];

		/* set scalefac_compress */
		slen1 = cod_info->slen[0];
		slen2 = cod_info->slen[1];
		slen3 = cod_info->slen[2];
		slen4 = cod_info->slen[3];

		if(cod_info->preflag){ /* == 0 or 1 */
			cod_info->scalefac_compress = 500 + (slen1 * 3) + slen2;
		}else{
			cod_info->scalefac_compress = (((slen1 * 5) + slen2) << 4)
			+ (slen3 << 2)
			+ slen4;
		}
		/* from Takehiro TOMINAGA <tominaga@isoternet.org> 10/99 */
		for( i = 0, k = 0; k < 4; k++ ){
			i += cod_info->slen[k] * cod_info->sfbTblSub[k];
		}
		cod_info->part2_length = i;
	}
	return over;
} /* scale_bitcount_lsf */

static int (*scale_bitcount)(III_scalefac_t *scalefac, gr_info *cod_info, int gr, int ch);

void
InitLoop(void)
{
	int i;
	firstcall = 1;
	OldValue = -30;	/* bin_search_StepSize2  */
	masking_lower = 1;
	calc_noise_init();

	for( i = 0; i < SFB_SMAX - 1; i++ ){
		scalefac_shortR[i] = 1.0 / ( scalefac_short[i+1] - scalefac_short[i] );
	}
	for( i = 0; i < SFB_LMAX - 1; i++ ){
		scalefac_longR[i] = 1.0 / ( scalefac_long[i+1] - scalefac_long[i] );
	}
	scale_bitcount = (gl.mode_gr == 2) ? scale_bitcount_hi : scale_bitcount_lsf;
	resv.size = 0;	/* from old reserv.h */
	resv.max  = 0;
}

static
int
loop_break( III_scalefac_t *scalefac, gr_info *cod_info, int gr, int ch )
{
	int sfb;
	int *fac_l;
	int (*fac_s)[3];

	fac_l = scalefac->l[gr][ch];
	for( sfb = 0; sfb < cod_info->sfb_lmax; sfb++ ){
		if( !*fac_l++ ) return 0;
	}
    
	fac_s = scalefac->s[gr][ch];
	for( sfb = cod_info->sfb_smax; sfb < 12; sfb++ ){
		if( !fac_s[sfb][0] || !fac_s[sfb][1] || !fac_s[sfb][2] ) return 0;
	}
	return 1;
} /* loop_break */

/*
 *	outer_loop stereo == 2, convert_psy == TRUE ƂēƗ
 *	̒ŏ
 *	l3_enc,best_over,xr
 *
 *	best_over͊OƂ̂Ƃ肪̂ł̒̕ϐƂ
 */

static
void
outer_loop_dual( float xr[2][576], float xr_org[2][576],
    int mean_bits, ratio_t  *l3_xmin, int l3_enc[2][576],
    III_scalefac_t *scalefac, int gr,
    III_side_info_t *l3_side, ratio_t *ratio, float pe[2], float ms_ener_ratio[2])
{
	int notdone[2]={0,0},count[2];
	int targ_bits[2],real_bits[2];
	int scalesave_l[2][CBLIMIT], scalesave_s[2][CBLIMIT][3];
	int save_preflag[2], save_compress[2];
	float distort[4][CBLIMIT]; /* fast_modeł͎gȂ */
	char __xrpow[2*576*sizeof(float)+16];
	float (*xrpow)[576] = (float (*)[576])( (int)(__xrpow+15) & -16 ); /* OK */

	/* *_noisefast_modegȂ */
	float max_noise[2];
	float over_noise[2];
	float tot_noise[2];
	float best_max_noise[2];
	float best_over_noise[2];
	float best_tot_noise[2];

	int save_l3_enc[2][576];
	int save_real_bits[2];
	int iteration, ch;
	int best_over[2];
	gr_info save_cod_info[2];
	gr_info *cod_info[2];
	cod_info[0] = &l3_side->gr[gr].ch[0].tt;
	cod_info[1] = &l3_side->gr[gr].ch[1].tt;

	if( convert_mdct ){
		ms_convert( xr, xr_org );
	}else{
		memcpy(xr, xr_org, sizeof(float) * 2 * 576 );
	}
	init_outer_loop(xr[0], xr_org[0], l3_xmin, scalefac, gr, l3_side, ratio, 0, 1 );
	init_outer_loop(xr[1], xr_org[1], l3_xmin, scalefac, gr, l3_side, ratio, 1, 1 );

	for( ch = 0; ch < 2; ch++ ){
		int ct;
		ct = count_nz_xr(&xr[ch][0]);
		count[ch] = ct;
		if( ct ){
			best_over[ch] = 100;
			notdone[ch] = 1;
		}else{
			best_over[ch] = notdone[ch] = 0;
			/* notdone[ch]=0̎͂l3_enc̏ */
			memset( l3_enc[ch], 0, sizeof(int) * 576 );
		}
	}
  
	/* e`lbitW߂ */
	{
		int add_bits[2], bits;
		int tbits, extra_bits;

		ResvMaxBits2(mean_bits, &tbits, &extra_bits);
		targ_bits[0] = targ_bits[1] = tbits / 2;
		bits = 0;
		{
			float temp;
			temp = Max( pe[0], pe[1] );
			temp -= 750;
			if( temp < 0 ){
				temp = 0;
			}else{
				temp *= 0.7142857143; /* =1/1.4 */
			}
			if( cod_info[0]->block_type == 2 && temp < 500 ){
				add_bits[0] = 500;
			}else{
				add_bits[0] = temp;
			}
			if( cod_info[1]->block_type == 2 && temp < 500 ){
				add_bits[1] = 500;
			}else{
				add_bits[1] = temp;
			}
			bits += add_bits[0] + add_bits[1];
		}
		for( ch = 0; ch < 2; ch++ ){
			if( bits > extra_bits ){
				add_bits[ch] = (extra_bits*add_bits[ch])/bits;
			}
			targ_bits[ch] += add_bits[ch];
		}
		extra_bits -= add_bits[0];
		extra_bits -= add_bits[1];
	}  
	/* `l125 bitsȉɂ͌炳Ȃ */
	if( reduce_sidechannel ){
		float fac;
		fac = 0.5 - ms_ener_ratio[gr];
		if( fac > 0 ){
			fac *= (0.33 * 2) * targ_bits[1];
			if( targ_bits[1] - fac > 125 ){
				targ_bits[0] += fac;
				targ_bits[1] -= fac;
			}
		}
#ifdef	PEN_MS
		else {
			if( targ_bits[1] >= 125 ){ 
				targ_bits[0] += targ_bits[1] - 125;
				targ_bits[1] = 125;
			}
		}
#endif
	}

	/* 1̃`lɑRbit蓖ĂȂ */
	{
		int max_bits;
		max_bits = ( mean_bits > 5791 ) ? 4095 : ( mean_bits / 2 + 1200 );
		if( targ_bits[0] > max_bits )targ_bits[0] = max_bits;
		if( targ_bits[1] > max_bits )targ_bits[1] = max_bits;
	}

	/* BEGIN MAIN LOOP */
	iteration = 0;

	while( notdone[0] || notdone[1] ){
		int bits_found[2];
		int over[2];
		int better[2]; /* fast_mode͎gȂ */
		iteration++;

		if( iteration == 1 ){
			/* compute initial quantization step */
			for( ch = 0; ch < 2; ch++ ){
				if( !notdone[ch] ) continue;
				calc_pow075( xr[ch], xrpow[ch] );
				/* l3_enc͂̊֐ŏ */
				bits_found[ch] = 
					bin_search_StepSize2( targ_bits[ch], l3_enc[ch], xrpow[ch], cod_info[ch] );
			}
		}

		for( ch = 0; ch < 2; ch++ ){
			int huff_bits;
			if( !notdone[ch] )continue;
			huff_bits = targ_bits[ch] - cod_info[ch]->part2_length;
			if( huff_bits < 0 ){
				notdone[ch] = 0;
			}else{
				if( iteration == 1 && bits_found[ch] <= huff_bits ){
					real_bits[ch] = bits_found[ch];
				}else{
					if( iteration == 1 ){
						cod_info[ch]->quantizerStepSize++;
					}
					real_bits[ch] = inner_loop( xrpow[ch], l3_enc[ch], huff_bits, cod_info[ch] );
				}
			}
		}

		if( fast_mode ){
			over[0] = over[1] = 0;
		}else{
		/* mid/side coefficiets, l/r thresholds */
		/* over, distorẗꕔ, *_noise͂Őݒ肳 */
			calc_noise2( xr, l3_enc, cod_info,
				distort, l3_xmin,gr,over,over_noise,tot_noise,max_noise);

			if( iteration == 1 ){
				better[0] = better[1] = 1;
			}else{
				/* better͂Őݒ肳 */
				quant_compare_dual(better,notdone,best_over,best_over_noise,over,over_noise);
			}
		}

		/* save data so we can restore this quantization later */    
		for( ch = 0; ch < 2; ch++ ){
			if( !fast_mode ){
				if( !better[ch] )continue;
				best_over[ch] = over[ch];
				best_over_noise[ch] = over_noise[ch];
				best_tot_noise[ch] = tot_noise[ch];
				best_max_noise[ch] = max_noise[ch];
			}
			if( !notdone[ch] )continue;
		/* CBLIMIT  z̑傫1ԈႢł͂Ȃ炵(?) */
			memcpy( scalesave_l[ch], scalefac->l[gr][ch], CBLIMIT * sizeof(int) );
			memcpy( scalesave_s[ch], scalefac->s[gr][ch], SFB_SMAX * 3 * sizeof(int) );
			save_preflag[ch]  = cod_info[ch]->preflag;
			save_compress[ch] = cod_info[ch]->scalefac_compress;
			memcpy(save_l3_enc[ch],l3_enc[ch],sizeof(l3_enc[ch]));
			memcpy(&save_cod_info[ch],cod_info[ch],sizeof(save_cod_info[ch]));
			save_real_bits[ch] = real_bits[ch];
		}

		notdone[0] &= over[0] || over[1];	/* notdone[i] ={0,1} */
		notdone[1] &= over[0] || over[1];

		if( !fast_mode ){
			for( ch = 0; ch < 2; ch ++ ){
				if( !notdone[ch] )continue;
				if( !preemphasis( xr[ch], xrpow[ch], l3_xmin, gr, ch, l3_side, distort ) ){
					amp_scalefac_bands( xr[ch], xrpow[ch], l3_xmin,
						l3_side, scalefac, gr, ch, iteration, distort );
				}
			}
		}
	/* check to make sure we have not amplified too much */

		for( ch = 0; ch < 2; ch++ ){
			int stat;
			if( !notdone[ch] )continue;

			stat = loop_break(scalefac, cod_info[ch], gr, ch);
			if(!stat) stat = scale_bitcount(scalefac, cod_info[ch], gr, ch);

			notdone[ch] = !stat;
		}
	} /* done with main iteration */

	/* restore some data */
	for( ch = 0; ch < gl.stereo; ch++ ){
		if( !count[ch] )continue;

		cod_info[ch]->preflag = save_preflag[ch];
		cod_info[ch]->scalefac_compress = save_compress[ch];

		memcpy( scalefac->l[gr][ch], scalesave_l[ch], CBLIMIT * sizeof(int) );
		memcpy( scalefac->s[gr][ch], scalesave_s[ch], SFB_SMAX * 3 * sizeof(int) );
		real_bits[ch] = save_real_bits[ch];

		memcpy(l3_enc[ch],save_l3_enc[ch],sizeof(l3_enc[ch]));   
		memcpy(cod_info[ch],&save_cod_info[ch],sizeof(save_cod_info[ch]));

		scale_bitcount(scalefac, cod_info[ch], gr, ch);
		cod_info[ch]->part2_3_length = cod_info[ch]->part2_length + real_bits[ch];
	}

	/* finish up */
	for( ch = 0; ch < gl.stereo; ch ++ ){
		ResvAdjust(cod_info[ch]->part2_3_length, mean_bits);
		cod_info[ch]->global_gain = cod_info[ch]->quantizerStepSize + 210;
	}
} /* end of outer_loop_dual() */

/*
 *	l3_enc͂ŏ
 *	00/01/05 sloppy=0 Ƃ͌Ȃ ( if VBR )
 *	00/01/11 best_noise VBR̎̂ݎgp
 */

static
float
outer_loop( float xr[576], int targ_bits, float best_noise[4], int sloppy,
		ratio_t  *l3_xmin, int l3_enc[2][2][576],
		III_scalefac_t *scalefac,int gr, III_side_info_t *l3_side,
		ratio_t *ratio, float ms_ener_ratio, int ch )
{
	int iteration;
	int count = 0, bits_found = 0;
	int real_bits = 0;
	int scalesave_l[SFB_LMAX], scalesave_s[SFB_SMAX][3];
	float unaligned_xrpow[576+4];
	float (*xrpow) = (float *)(((int)unaligned_xrpow + 15) & ~15);
	float distort[4][CBLIMIT];
	int save_l3_enc[576];
	int save_real_bits = 0;
	int save_preflag = 0, save_compress = 0;
	int better;
	int over = 0;
	float max_noise = -999;
	float over_noise = 0;
	float tot_noise = 0;

	int best_over = 0;
	float best_over_noise = 0;
	float best_max_noise = 0;	/* ext_malX=0͎̎gȂ */
	float best_tot_noise = 0;	/* ext_malX=0͎̎gȂ */
	gr_info save_cod_info;
	gr_info *cod_info;

	int notdone=1;

	cod_info = &l3_side->gr[gr].ch[ch].tt;
	/* lame  init_outer_loop{"",dual}͍Ō̈gē */
	init_outer_loop(xr, NULL, l3_xmin,scalefac,gr,l3_side,ratio,ch,0); 
	best_over = 100;
	count = count_nz_xr(&xr[0]);
	if( count == 0 ){
		best_over = 0;
		notdone = 0;
	/* notdone=0̎l3_enc */
		memset( l3_enc[gr][ch], 0, sizeof(int) * 576 );
	}

	/* BEGIN MAIN LOOP */
	iteration = 0;
	while( notdone  ){
		int huff_bits;
		iteration++;

		if( iteration == 1 ){
			calc_pow075( xr, xrpow );
		/* notdone!=0̎l3_enc */
			bits_found = 
			bin_search_StepSize2( targ_bits, l3_enc[gr][ch], xrpow, cod_info );
		}

	/* inner_loop starts with the initial quantization step computed above
	 * and slowly increases until the bits < huff_bits.
	 * Thus is it important not to start with too large of an inital
	 * quantization step.  Too small is ok, but inner_loop will take longer 
	 */
		huff_bits = targ_bits - cod_info->part2_length;
		if( huff_bits < 0 ){
			notdone = 0;
		}else{
		/* if this is the first iteration, see if we can reuse the quantization
		 * computed in bin_search_StepSize above
		 */

			if( iteration == 1 && bits_found <= huff_bits ){
				real_bits = bits_found;
			}else{
				if( iteration == 1 ){
					cod_info->quantizerStepSize++;
				}
				real_bits = inner_loop( xrpow, l3_enc[gr][ch], huff_bits, cod_info );
			}
		}

		if( notdone ){
			if( fast_mode ){
				over = 0;
				better = 1;
			}else{
				over = calc_noise1( xr, l3_enc[gr][ch], cod_info, distort,
				l3_xmin,gr,ch, &over_noise, &tot_noise, &max_noise);

				if( iteration == 1 ){
					better = 1;
				}else{
					better = over < best_over || (over == best_over && over_noise < best_over_noise);
				}
			}
		/* save data so we can restore this quantization later */    
			if( better ){
				if( !fast_mode ){
					best_over = over;
					best_max_noise = max_noise;
					best_over_noise = over_noise;
					best_tot_noise = tot_noise;
				}
				if( !sloppy ){
					memcpy( scalesave_l, scalefac->l[gr][ch], CBLIMIT * sizeof(int) );
					memcpy( scalesave_s, scalefac->s[gr][ch], SFB_SMAX * 3 * sizeof(int) );
					save_preflag  = cod_info->preflag;
					save_compress = cod_info->scalefac_compress;

					memcpy(save_l3_enc,l3_enc[gr][ch],sizeof(l3_enc[gr][ch]));   
					memcpy(&save_cod_info,cod_info,sizeof(save_cod_info));
					save_real_bits = real_bits;
				}
			}
		}

		if(!over) notdone = 0;

	/* in sloppy mode, as soon as we know we can do better than targ_noise,
	 * quit.  This is used for the inital VBR bin search.  Turn it off for
	 * final (optimal) quantization */
#ifdef USE_VBR
		if( sloppy && notdone ){
			notdone = 
			!VBR_compare((int)targ_noise[0],targ_noise[3],targ_noise[2],
				targ_noise[1],over,tot_noise,over_noise,max_noise);
		}
#endif /* USE_VBR */
		if( notdone && !fast_mode ){
			if( !preemphasis(xr,xrpow,l3_xmin,gr,ch,l3_side,distort) ){
				notdone = amp_scalefac_bands( xr, xrpow, l3_xmin,
					l3_side, scalefac, gr, ch, iteration,distort);
			}
		}

		if( notdone ){
			int status;
			status = loop_break( scalefac, cod_info, gr, ch );
			if(!status) status = scale_bitcount(scalefac, cod_info, gr, ch);

			notdone = !status;
		}
	} /* done with main iteration */

	if( count && !sloppy ){
		cod_info->preflag = save_preflag;
		cod_info->scalefac_compress = save_compress;

		memcpy( scalefac->l[gr][ch], scalesave_l, sizeof(int) * CBLIMIT );
		memcpy( scalefac->s[gr][ch][0], scalesave_s[0], sizeof(int) * 3 * SFB_SMAX );
		real_bits = save_real_bits;
		memcpy( l3_enc[gr][ch], save_l3_enc, sizeof(l3_enc[gr][ch]) );   
		memcpy( cod_info, &save_cod_info, sizeof(save_cod_info) );

		scale_bitcount(scalefac, cod_info, gr, ch);
		cod_info->part2_3_length = cod_info->part2_length + real_bits;
	}

	cod_info->global_gain = cod_info->quantizerStepSize + 210;
#if defined(USE_VBR)
	if( VBR ){
		best_noise[0] = best_over;
		best_noise[1] = best_max_noise;
		best_noise[2] = best_over_noise;
		best_noise[3] = best_tot_noise;
	}
#endif
	return best_over;
} /* endof outer_loop() */


static
void
set_l3_enc_sign_C(float xr[], int l3_enc[])
{
	int *pi = l3_enc;
	int *p = (int *)xr;
	int i;
	for( i = 0; i < 576; i++ ){
		if( (*p & 0x80000000) && (*pi > 0) )*pi = -*pi;
		p++;
		pi++;
	}
}

void set_l3_enc_sign_3DN(float xr[], int l3_enc[]);
void set_l3_enc_sign_MMX(float xr[], int l3_enc[]);
//void set_l3_enc_sign_SSE(float xr[], int l3_enc[]);
//void set_l3_enc_sign_NONE(float xr[], int l3_enc[]);
#ifdef USE_E3DN
void set_l3_enc_sign_E3DN(float xr[], int l3_enc[]);
#endif

void setup_set_l3_enc_sign(int useUNIT){
#ifdef USE_E3DN
	if( useUNIT & tE3DN ){
		SETUP_DSP("use:set_l3_enc_sign_E3DN\n");
		set_l3_enc_sign = set_l3_enc_sign_E3DN;
	}else
#endif
	if( useUNIT & t3DN ){
		SETUP_DSP("use:set_l3_enc_sign_3DN\n");
		set_l3_enc_sign = set_l3_enc_sign_3DN;
	}else
//	if( useUNIT & tSSE ){
//		SETUP_DSP("use:set_l3_enc_sign_SSE\n");
//		set_l3_enc_sign = set_l3_enc_sign_SSE;
//	}else
	if( useUNIT & tMMX ){
		SETUP_DSP("use:set_l3_enc_sign_MMX\n");
		set_l3_enc_sign = set_l3_enc_sign_MMX;
	}else
	{
		SETUP_DSP("use:set_l3_enc_sign_C\n");
		set_l3_enc_sign = set_l3_enc_sign_C;
//		SETUP_DSP("use:set_l3_enc_sign_NONE\n");
//		set_l3_enc_sign = set_l3_enc_sign_NONE;
	}
}


/* łVBR͎gȂ */
/*
 *	ŏϐ
 *	l3_enc, l3_xmin, l3_sidëꕔ, scalefac
 */

void
iteration_loop( float pe[][2], float ms_ener_ratio[2],
	float xr_org[2][2][576], ratio_t *ratio,
		III_side_info_t *l3_side, int l3_enc[2][2][576],
		III_scalefac_t *scalefac )
{
	ratio_t l3_xmin;
	int bitsPerFrame;
	int mean_bits;
	int ch, gr;
	float unaligned_xr[2*2*576+4];
	float (*xr)[2][576] = (float (*)[2][576])(((int)unaligned_xr + 15) & ~15);

	iteration_init( xr_org, l3_side );

	getframebits(&bitsPerFrame,&mean_bits,l3_side->rate_idx,l3_side->padding);
	ResvFrameBegin( mean_bits, bitsPerFrame );

	for( gr = 0; gr < gl.mode_gr; gr++ ){
/* convert_psŷƂstereo=2 */
		if( convert_psy ){
	/* dual channel version can quantize Mid/Side channels with L/R
	 * maskings (by constantly reconstructing L/R data).  Used before we
	 * we had proper mid/side maskings. */
	   outer_loop_dual( xr[gr], xr_org[gr], mean_bits,
			&l3_xmin,l3_enc[gr], scalefac,gr, l3_side, ratio, pe[gr], ms_ener_ratio);
		}else{
			int targ_bits[2];
			/* copy data to be quantized into xr */
			if( convert_mdct ){
				ms_convert( xr[gr], xr_org[gr] );
			}else{
//				memcpy( xr[gr], xr_org[gr], sizeof(float)*2*576 );
// Rs[łǂƎvB	xr = xr_org;
// ˁB
				xr = xr_org;
			}
			on_pe( pe[gr], l3_side, targ_bits, mean_bits, gr );
			if( reduce_sidechannel ){
				reduce_side( targ_bits, ms_ener_ratio[gr], mean_bits );
			}

			for( ch = 0; ch < gl.stereo; ch++ ){
				gr_info *cod_info;
				outer_loop( xr[gr][ch], targ_bits[ch], NULL, 0, &l3_xmin,l3_enc, 
				scalefac,gr, l3_side, ratio, ms_ener_ratio[gr],ch);
				cod_info = &l3_side->gr[gr].ch[ch].tt;
				ResvAdjust(cod_info->part2_3_length, mean_bits);
			}
		}
	}

	/* set the sign of l3_enc */
	for( gr = 0; gr < gl.mode_gr; gr++ ){
		for( ch =  0; ch < gl.stereo; ch++ ){
			set_l3_enc_sign(&xr[gr][ch][0], &l3_enc[gr][ch][0]);
		}
	}
	l3_side->resvDrain = ResvFrameEnd(mean_bits);
} /* end of iteration_loop */

#ifdef USE_VBR

void
VBR_iteration_loop( float pe[2][2], float ms_ener_ratio[2],
		float xr_org[2][2][576], ratio_t *ratio,
		III_side_info_t *l3_side, int l3_enc[2][2][576],
		III_scalefac_t *scalefac )
{
	ratio_t l3_xmin;
	int mean_bits;
	int ch, gr;
	int frameBits[15];
	int min_mean_bits = 0;
	float noise[4];
	float __xr_save[576+4];
	float *xr_save = (float *)(( (int)__xr_save + 15 ) & -16 );
	float masking_lower_db;
	int save_bits[2][2], used_bits=0, bits;
	int idx;

	float unaligned_xr[2*2*576+4];
	float (*xr)[2][576] = (float (*)[2][576])(( (int)unaligned_xr + 15) & -16);

	iteration_init( xr_org, l3_side );

	/*******************************************************************
	 * how many bits are available for each bitrate?
	 *******************************************************************/

	for( idx = VBR_min_rate_idx; idx <= VBR_max_rate_idx; idx++ ){
		mean_bits = mean_bits_table[idx][l3_side->padding];
		if( idx == VBR_min_rate_idx && gl.stereo == 2 ){
			min_mean_bits = mean_bits >>1;
		}
		frameBits[idx]= ResvFrameBegin( mean_bits, bitsPerFrame_table[idx][l3_side->padding]);
	}

	l3_side->rate_idx = VBR_max_rate_idx;

	/*******************************************************************
	 * how many bits would we use of it?
	 *******************************************************************/

	for( gr = 0; gr < gl.mode_gr; gr++){
		int num_chan = gl.stereo;
		/* determine quality based on mid channel only */
		if( reduce_sidechannel ) num_chan = 1;

		/* copy data to be quantized into xr */
		if( convert_mdct ){
			ms_convert(xr[gr],xr_org[gr]);
		}else{
			memcpy(xr[gr],xr_org[gr],sizeof(float)*2*576);   
		}

		for( ch = 0; ch < num_chan; ch++){
			gr_info *cod_info;
			int dbits, this_bits, min_bits, max_bits;
		/******************************************************************
		 * find smallest number of bits for an allowable quantization
		 ******************************************************************/
			memcpy(xr_save,xr[gr][ch],sizeof(float)*576);   
			cod_info = &l3_side->gr[gr].ch[ch].tt;
			min_bits = Max(125,min_mean_bits);
			if( cod_info->block_type == SHORT_TYPE ){
				min_bits += Max(1100,pe[gr][ch]);
				min_bits = Min(min_bits,1800);
			}

			max_bits = 1200 + frameBits[VBR_max_rate_idx]/(gl.stereo*gl.mode_gr);
			max_bits = Min(max_bits, 2500);
			max_bits = Max(max_bits, min_bits);

		/* in the case we will not find any better, we allocate max_bits */
			save_bits[gr][ch] = max_bits;

			dbits = ( max_bits - min_bits ) >>2;
			this_bits = ( max_bits + min_bits ) >>1;
		/* bin search to within +/- 10 bits of optimal */
			do{
				int better;
				float fac;
				masking_lower_db = -10 + 2 * VBR_q;
				fac = 2.526315789e-3 * this_bits - 6.315789474;
				masking_lower_db += fac;
				masking_lower = pow(10.0, masking_lower_db * 0.1);
	/* VBR will look for a quantization which has better values
	 * then those specified below.*/
				memcpy(xr[gr][ch],xr_save,sizeof(float)*576);
				outer_loop( xr[gr][ch], this_bits, noise, 1,&l3_xmin,
				l3_enc, scalefac,gr, l3_side, ratio, ms_ener_ratio[gr], ch);

				better=VBR_compare((int)targ_noise[0],targ_noise[3],targ_noise[2],
				      targ_noise[1],(int)noise[0],noise[3],noise[2],noise[1]);

				if( better ){
					save_bits[gr][ch] = this_bits;
					this_bits -= dbits;
				}else{
				  this_bits += dbits;
				}
				dbits >>=1;
			}while( dbits > 10 );
			used_bits += save_bits[gr][ch];
		} /* ch */
	} /* gr */

	if( reduce_sidechannel ){
		/* number of bits needed was found for MID channel above.  Use formula
		 * (fixed bitrate code) to set the side channel bits */
		for( gr = 0; gr < gl.mode_gr; gr++ ){
			float fac = 0.33 - 0.66 * ms_ener_ratio[gr];
			save_bits[gr][1] = ( 1 - fac ) / ( 1 + fac ) * save_bits[gr][0];
			used_bits += save_bits[gr][1];
		}
	}

	/******************************************************************
	 * find lowest bitrate able to hold used bits
	 ******************************************************************/
	idx = VBR_min_rate_idx;
	for( ; idx < VBR_max_rate_idx; idx++ ){
		if( used_bits <= frameBits[idx] ) break;
	}
	l3_side->rate_idx = idx;
	assert( idx <= VBR_max_rate_idx );

  /*******************************************************************
   * calculate quantization for this bitrate
   *******************************************************************/  

	mean_bits = mean_bits_table[l3_side->rate_idx][l3_side->padding];
	bits = ResvFrameBegin( mean_bits, bitsPerFrame_table[l3_side->rate_idx][l3_side->padding]);

  /* repartion available bits in same proportion */
	if( used_bits > bits ){
		for( gr = 0; gr < gl.mode_gr; gr++ ){
			for( ch = 0; ch < gl.stereo; ch++){
				save_bits[gr][ch]=(save_bits[gr][ch]*frameBits[l3_side->rate_idx])/used_bits;
			}
		}
	}
	assert(used_bits <= bits);

	for( gr = 0; gr < gl.mode_gr; gr++ ){
	/* copy data to be quantized into xr */
		if( convert_mdct ){
			ms_convert(xr[gr],xr_org[gr]);
		}else{
			memcpy(xr[gr],xr_org[gr],sizeof(float)*2*576);   
		}
		for( ch = 0; ch < gl.stereo; ch++ ){
			outer_loop( xr[gr][ch], save_bits[gr][ch], noise, 0,
			&l3_xmin,l3_enc, scalefac,gr, l3_side, ratio, ms_ener_ratio[gr], ch);
		}
	}
	/* update reservoir status after FINAL quantization/bitrate */
	for(gr = 0; gr < gl.mode_gr; gr++){
		ResvAdjust(l3_side->gr[gr].ch[0].tt.part2_3_length, mean_bits);
		if(gl.stereo == 2) ResvAdjust(l3_side->gr[gr].ch[1].tt.part2_3_length, mean_bits);
	}

	/* set the sign of l3_enc */
	for( gr = 0; gr < gl.mode_gr; gr++ ){
		set_l3_enc_sign(&xr[gr][0][0], &l3_enc[gr][0][0]);
		if( gl.stereo == 2 ) set_l3_enc_sign(&xr[gr][1][0], &l3_enc[gr][1][0]);
	}
	l3_side->resvDrain = ResvFrameEnd(mean_bits);
} /* end of VBR_iteration_loop */
#endif /* USE_VBR */

/*
 *	00/01/08
 *	ret = ix_max()ɌĂяo
 *	if( ret > 8191 + 14 ) return 100000; else return 0;
 */

int quantize_xrpow_FPU(float xr[576],int ix[576],gr_info *cod_info);
int quantize_xrpow_3DN(float xr[576],int ix[576],gr_info *cod_info);
int quantize_xrpow_SSE(float xr[576],int ix[576],gr_info *cod_info);
#ifdef USE_E3DN
int quantize_xrpow_E3DN(float xr[576],int ix[576],gr_info *cod_info);
#endif

void setup_quantize_xrpow(int useUNIT){
#ifdef USE_E3DN
	if(useUNIT & tE3DN){
		SETUP_DSP("use:quantize_xrpow_E3DN\n");
		quantize_xrpow=quantize_xrpow_E3DN;
	}else
#endif
	if(useUNIT & t3DN){
		SETUP_DSP("use:quantize_xrpow_3DN\n");
		quantize_xrpow=quantize_xrpow_3DN;
	}else
	if(useUNIT & tSSE){
		SETUP_DSP("use:quantize_xrpow_SSE\n");
		quantize_xrpow=quantize_xrpow_SSE;
	}else{
		SETUP_DSP("use:quantize_xrpow_FPU\n");
		quantize_xrpow=quantize_xrpow_FPU;
    }
}

int ix_max_3DN( int ix[576], unsigned int begin, unsigned int end );
int ix_max_MMX( int ix[576], unsigned int begin, unsigned int end );
int ix_max_SSE( int ix[576], unsigned int begin, unsigned int end );
int ix_max_NONE( int ix[576], unsigned int begin, unsigned int end );
#ifdef USE_E3DN
int ix_max_E3DN( int ix[576], unsigned int begin, unsigned int end );
#endif

int ix_max_C( int ix[576], unsigned int begin, unsigned int end ){
	int i, x, max = 0;
	for ( i = begin; i < end; i++ ){
		x = ix[i];
		if( x > max )
			max = x;
	}
    return max;
}

void
setup_ix_max(int useUNIT)
{
#ifdef USE_E3DN
	if(useUNIT & tE3DN){
		SETUP_DSP("use:ix_max_E3DN\n");
		ix_max=ix_max_E3DN;
	}else
#endif
	if(useUNIT & t3DN){
		SETUP_DSP("use:ix_max_3DN\n");
		ix_max=ix_max_3DN;
	}else if(useUNIT & tSSE){
		SETUP_DSP("use:ix_max_SSE\n");
		ix_max=ix_max_SSE;
	}else if(useUNIT & tMMX){
		SETUP_DSP("use:ix_max_MMX\n");
		ix_max=ix_max_MMX;
	}else{
		SETUP_DSP("use:ix_max_NONE\n");
		ix_max=ix_max_NONE;
	}
}

/* ȉ count_bits()̂߂̊֐Q */

/* exported to huffmana.nas */
static const int choose_table_ptn[]={
/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
   0, 1, 2, 5, 7, 7,10,10,13,13,13,13,13,13,13,13,
  16,17,17,18,18,18,18,19,19,19,19,19,19,19,19,20};

static
int
choose_tableH( int max )
{
	int  i;
	{
	max -= 15;
		for (i = 15; i < 32; i++ ){
			if( ht[i].linmax >= max ) return i;
		}
	}
	return 0;	/* here if max >= 8192 (ht[24].linmax) cf. huffcode.tbl */
}
#define choose_table( max ) ( max < 32 ) ? choose_table_ptn[max] : choose_tableH( max )

/*
 *	99/08/01
 *	count_bit()̒u
 *	99/08/14
 *	begin<end肵Ă悢
 *	99/12/23
 *	Ԃl̎̂bitss𑫂
 *	begin,end͋
 */

static int new_choose_table_NONE( int ix[576], unsigned int begin, unsigned int end, int *bits )
{
	int i, max;
	max = ix_max( ix, begin, end );

	if( max <= 15 ){	/* 8 1500clk */
		int sum;
		int choice = 0;
		if( max == 0 ) return 0;
	/* try tables with no linbits */
		choice = choose_table_ptn[max];

		/* K choice != 0 */
		sum = count_bit( ix, begin, end, choice );
		switch( choice ){
		case 2:
			max = count_bit( ix, begin, end, 3 );
			if( max <= sum ){
				sum = max;
				choice = 3;
			}
			break;

		case 5:
			max = count_bit( ix, begin, end, 6 );
			if( max <= sum ){
				sum = max;
				choice = 6;
			}
			break;

		case 7:
			max = count_bit( ix, begin, end, 8 );
			if( max <= sum ){
				sum = max;
				choice = 8;
			}
			max = count_bit( ix, begin, end, 9 );
			if( max <= sum ){
				sum = max;
				choice = 9;
			}
			break;

		case 10:
			max = count_bit( ix, begin, end, 11 );
			if( max <= sum ){
				sum = max;
				choice = 11;
			}
			max = count_bit( ix, begin, end, 12 );
			if( max <= sum ){
				sum = max;
				choice = 12;
			}
			break;

		case 13:
			max = count_bit( ix, begin, end, 15 );
			if( max <= sum ){
				sum = max;
				choice = 15;
			}
			break;
		default:
			break;
		}
		*bits += sum;
		return choice;
	}else{	/* 2 800clk */
		int sum[2];
		int choice[2];
		choice[0] = 0;
		choice[1] = 0;

	/* try tables with linbits */
		max -= 15;	// max  16ȏゾB max >= 1

		for( i = 16; i < 24; i++ ){
			if( ht[i].linmax >= max ){
				choice[0] = i;
			break;
			}
		}
		for( i = 24; i < 32; i++ ){
			if( ht[i].linmax >= max ){
				choice[1] = i;
				break;
			}
		}
		if( choice[0] ){
			sum[0] = count_bit( ix, begin, end, choice[0] );
		}else{
			sum[0] = 0;
		}
		if( choice[1] ){
			sum[1] = count_bit( ix, begin, end, choice[1] );
		}else{
			sum[1] = 0;
		}
		if( sum[1] < sum[0] ){
			sum[0] = sum[1];
			choice[0] = choice[1];
		}
		if( choice[0] ){
			*bits += sum[0];
		}
		return choice[0];
	}
}

static int bigv_bitcount( int ix[576], gr_info *cod_info ){
	/* ɂ͖wǂȂ 24000clk */
	/*
	  Within each scalefactor band, data is given for successive
	  time windows, beginning with window 0 and ending with window 2.
	  Within each window, the quantized values are then arranged in
	  order of increasing frequency...
	  */
	int bits = 0;
	int sfb, window, line, start, end, max1, max2, x, y;
	int region1Start;
	int *pmax;
	I192_3 *ix_s;
	cod_info->table_select[0] = 0;
	cod_info->table_select[1] = 0;
	cod_info->table_select[2] = 0;

	region1Start = 12;
	max1 = max2 = 0;
	for( sfb = 0; sfb < 13; sfb++ ){
		start = scalefac_short[ sfb ];
		end   = scalefac_short[ sfb+1 ];
		if( start < region1Start ){
			pmax = &max1;
		}else{
			pmax = &max2;
		}
		for( window = 0; window < 3; window++ ){
			for( line = start; line < end; line += 2 ){
				x = ix[ (line * 3) + window ];
				y = ix[ ((line + 1) * 3) + window ];
				*pmax = *pmax > x ? *pmax : x;
				*pmax = *pmax > y ? *pmax : y;
			}
		}
	}
	cod_info->table_select[0] = choose_table(max1);
	cod_info->table_select[1] = choose_table(max2);

	/*
	  Within each scalefactor band, data is given for successive
	  time windows, beginning with window 0 and ending with window 2.
	  Within each window, the quantized values are then arranged in
	  order of increasing frequency...
	*/
	sfb = 0;

	ix_s = (I192_3 *) &ix[0];

	for( ; sfb < 13; sfb++ ){
		unsigned tableindex = 100;

		start = scalefac_short[ sfb ];
		end   = scalefac_short[ sfb+1 ];

		if( start < 12 ){
			tableindex = cod_info->table_select[0];
		}else{
			tableindex = cod_info->table_select[1];
		}
		if( !tableindex ) continue;
		for( window = 0; window < 3; window++ ){
			for( line = start; line < end; line += 2 ){
				unsigned int code, ext;
				int cbits, xbits;
				int x = (*ix_s)[line][window];
				int y = (*ix_s)[line + 1][window];
				bits += HuffmanCode( tableindex, x, y, &code, &ext, &cbits, &xbits );
			}
		}
	}
	return bits;
}

int count_bit_NONE(int ix[576],unsigned int start,unsigned int end,unsigned int table);
int count_bit_MMX(int ix[576],unsigned int start,unsigned int end,unsigned int table);
void setup_count_bit(int useUNIT){
	if(useUNIT & tAMD && useUNIT & tFAMILY6 && useUNIT & tSPC1){
		SETUP_DSP("use:count_bit_NONE\n");
		count_bit = count_bit_NONE;
	}
	else if(useUNIT & tMMX){
		SETUP_DSP("use:count_bit_MMX\n");
		count_bit = count_bit_MMX;
	}
	else{
		SETUP_DSP("use:count_bit_NONE\n");
		count_bit=count_bit_NONE;
	}
}

int new_choose_table_MMX( int ix[576], unsigned int begin, unsigned int end, int *bits );
static int new_choose_table_NONE( int ix[576], unsigned int begin, unsigned int end, int *bits );
void setup_new_choose_table(int useUNIT)
{
/*
 * x`[hł NONE ̕ ۂ̃GR[hł MMX ̕ on K7
 * \x̂ȂZ?
 */
	if(useUNIT & tAMD && useUNIT & tFAMILY6 && useUNIT & tSPC1){
		SETUP_DSP("use:new_choose_table_NONE\n");
		new_choose_table = new_choose_table_NONE;
	}
	else
	if(useUNIT & tMMX){
		SETUP_DSP("use:new_choose_table_MMX\n");
		new_choose_table = new_choose_table_MMX;
	}
	else{
		SETUP_DSP("use:new_choose_table_NONE\n");
		new_choose_table =  new_choose_table_NONE;
	}
}

/*
 *	lame3.55ɕcalc_runlen  count1_bitcount 𓝍 00/01/16
 *	xI͑SRςȂ
 */
int calc_runlen_count1_NONE( int *ix, gr_info *cod_info );
int calc_runlen_count1_CMOV( int *ix, gr_info *cod_info );

void setup_calc_runlen_count1(int useUNIT)
{
	if(useUNIT & tCMOV){
		SETUP_DSP("use:calc_runlen_count1_CMOV\n");
		calc_runlen_count1 = calc_runlen_count1_CMOV;
	}
	else{
		SETUP_DSP("use:calc_runlen_count1_NONE\n");
		calc_runlen_count1 = calc_runlen_count1_NONE;
	}
}

/*
 *	max=ix_max();傫邩ǂ̂߂ɎĝȂ
 *	rŎ~߂ĖȂ, ȗĂ悤ȋC
 *	98/08/09	PEN̂ƂŒׂʖڂ炵
 *	total 11000clk
 *	8191 + 14 ͕ύXs
 *	00/01/05 𕪂
 *	MMXgȂȂ Takehiro [`̕
 *	00/01/18 non MMXłȂ̍
 *	00/03/03 subdivide ̃oO & 
 */

static
int
count_bits( int  *ix, gr_info *cod_info)
{
	if( cod_info->block_type == SHORT_TYPE ){
		cod_info->count1 = 0;
		cod_info->big_values = 288;
		cod_info->count1table_select = 1;
		/* from subdivide */
		cod_info->region0_count =  8;
		cod_info->region1_count =  36;
		cod_info->adr1 = 36;
		cod_info->adr2 = 576;
		cod_info->adr3 = 0;
		return bigv_bitcount(ix,cod_info);
	}else{
		int bits;
		int adr;
		int bigv;
		/* 99% */
		bits = calc_runlen_count1( ix, cod_info );
		bigv = cod_info->big_values * 2;
		if( !cod_info->big_values ){
			cod_info->region0_count = 0;
			cod_info->region1_count = 0;
			/* adr?0Ƃ͌Ȃ */
		}else if( cod_info->window_switching_flag ){
			cod_info->region0_count = 7;
			cod_info->region1_count = 13;
			cod_info->adr1 = scalefac_long[8];
			cod_info->adr2 = bigv;
			cod_info->adr3 = 0;
		}else{
			static const int region0[23]={0,0,0,0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6};
			static const int region1[23]={0,0,0,0,0,1,1,1,2,2,3,3,4,4,4,5,5,6,6,6,7,7,7};
			int i = 1;

			while( scalefac_long[i] < bigv ) i++;

			cod_info->region0_count = region0[i];
			cod_info->adr1 = scalefac_long[ region0[i] + 1 ];

			cod_info->region1_count = region1[i];
			cod_info->adr2 = scalefac_long[ region0[i] + region1[i] + 2 ];
			cod_info->adr3 = bigv;
		}
		cod_info->table_select[0] = cod_info->table_select[1] = cod_info->table_select[2] = 0;
		adr = Min( cod_info->adr1, cod_info->adr2 );
		if( adr > 0 ){
			cod_info->table_select[0] = new_choose_table( ix, 0, adr, &bits );
		}
		if( cod_info->adr2 > cod_info->adr1 ){
			cod_info->table_select[1] = new_choose_table( ix, cod_info->adr1, cod_info->adr2, &bits );
		}
		if( bigv > cod_info->adr2 ){
			cod_info->table_select[2] = new_choose_table( ix, cod_info->adr2, bigv, &bits );
		}
		return bits;
	}
}
