/*
Copyright (c) 2002,2003 Anatoliy Kuznetsov.

Permission is hereby granted, free of charge, to any person 
obtaining a copy of this software and associated documentation 
files (the "Software"), to deal in the Software without restriction, 
including without limitation the rights to use, copy, modify, merge, 
publish, distribute, sublicense, and/or sell copies of the Software, 
and to permit persons to whom the Software is furnished to do so, 
subject to the following conditions:

The above copyright notice and this permission notice shall be included 
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
OTHER DEALINGS IN THE SOFTWARE.
*/

#include <bitset>
#include <iostream>
#include <time.h>

// No intermix FP with integer SSE in this program
//#define BM_SET_MMX_GUARD
//#define BMSSE2OPT


//#define BM_DISBALE_BIT_IN_PTR
#include "bm.h"
#include "bmalgo.h"

#include <math.h>

using namespace std;

const unsigned BSIZE = 150000000;
const unsigned int REPEATS = 300;

typedef  bitset<BSIZE>  test_bitset;


class TimeTaker
{
public:

    TimeTaker(const char* test_name, unsigned repeats) 
        : test_name_(test_name), repeats_(repeats) 
    {
        start_ = clock();
    }

    ~TimeTaker()
    {
        finish_ = clock();
        clock_t elapsed_clocks = finish_ - start_;
        double duration = (double)(finish_ - start_) / CLOCKS_PER_SEC;

        cout << test_name_ << " ; " <<  elapsed_clocks << ";" << duration << ";";
        if (repeats_)
        {
            double ops_per_sec = (double)repeats_ / duration;
            cout << ops_per_sec;
        }
        cout << endl;
    }

private:
    const char*  test_name_;
    clock_t      start_;
    clock_t      finish_;
    unsigned     repeats_;
};

typedef bm::bvector<> bvect;


void SimpleFillSets(test_bitset& bset, 
                     bvect& bvect,
                       unsigned min, 
                       unsigned max,
                       unsigned fill_factor,
                       bool set_flag=true)
{
    for (unsigned i = min; i < max; i+=fill_factor)
    {
        bset[i] = true;
        bvect[i] = true;
    } // for i
}


//
// Interval filling.
// 111........111111........111111..........11111111.......1111111...
//

void FillSetsIntervals(test_bitset& bset, 
                       bvect& bvect,
                       unsigned min, 
                       unsigned max,
                       unsigned fill_factor,
                       bool set_flag=true)
{
    while(fill_factor==0)
    {
        fill_factor=rand()%10;
    }

    unsigned i, j;
    unsigned factor = 10 * fill_factor;
    for (i = min; i < max; ++i)
    {
        unsigned len, end; 

        do
        {
            len = rand() % factor;
            end = i+len;
            
        } while (end >= max);
        for (j = i; j < end; ++j)
        {
            if (set_flag)
            {
                bset[j] = true;
                bvect[j]= true;
            }
            else
            {
                bset[j] = false;
                bvect[j] = false;
            }
                           
        } // j

        i = end;


        len = rand() % 10;

        i+=len;

        {
            for(unsigned k=0; k < 1000 && i < max; k+=3,i+=3)
            {
                if (set_flag)
                {
                    bset[i] = true;
                    bvect[i] = true;            
                }
                else
                {
                    bset[j] = false;
                    bvect[j] = false;
                }

            }
        }

    } // for i

}


void MemCpyTest()
{
    unsigned* m1 = new unsigned[BSIZE/32];
    unsigned* m2 = new unsigned[BSIZE/32];
    
    unsigned int i,j;
    {
    TimeTaker tt("Memory ADD transfer test.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        for (j = 0; j < BSIZE/32; j+=4)
        {
            m1[j+0] += m2[j+0];
            m1[j+1] += m2[j+1];
            m1[j+2] += m2[j+2];
            m1[j+3] += m2[j+3];
        }
    }
    }
    
    {
    TimeTaker tt("memcpy transfer test.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        memcpy(m1, m2, BSIZE/32 * sizeof(unsigned));
    }
    }
    
    delete [] m1;
    delete [] m2;
}


void BitCountTest()
{
    {
    bvect*  bv = new bvect();
    test_bitset*  bset = new test_bitset();
    unsigned value = 0;

    FillSetsIntervals(*bset, *bv, 0, BSIZE, 10);

    {
    TimeTaker tt("BitCount. Random bitvector. BM.", REPEATS);
    for (unsigned i = 0; i < REPEATS; ++i)
    {    
        value+=bv->count();
    }
    }
    volatile unsigned* p = &value;
    unsigned c1 = *p;
    c1 = value = 0;


    {
    TimeTaker tt("BitCount. Random bitvector. STL.", REPEATS);
    for (unsigned i = 0; i < REPEATS; ++i)
    {    
        value += bset->count();
    }
    }

    c1 = *p;
    c1 = value = 0;

    delete bset;
    delete bv;

    }
}


void BitCountSparseTest()
{
    {
    bvect*  bv = new bvect();
    test_bitset*  bset = new test_bitset();
    unsigned value = 0, c1;
    volatile unsigned* p = &value;

    SimpleFillSets(*bset, *bv, 0, BSIZE, 2500);

    {
    TimeTaker tt("BitCount. Sparse bitset. BM.", REPEATS);
    for (unsigned i = 0; i < REPEATS; ++i)
    {    
        value += bv->count();
    }
    }

    {
    TimeTaker tt("BitCount. Sparse bitset. STL.", REPEATS);
    for (unsigned int i = 0; i < REPEATS; ++i)
    {    
        value += bset->count();
    }
    }

    c1 = *p;
    value = c1 = 0;
    

    bv->optimize();

    {
    TimeTaker tt("BitCount. GAP Sparse bitset. BM.", REPEATS);
    for (unsigned i = 0; i < REPEATS; ++i)
    {    
        value += bv->count();
    }
    delete bv;
    delete bset;
    }
    c1 = *p;
    value = c1 = 0;

    }

}



void BitCompareTest()
{
    {
    bvect*  bv1 = new bvect();
    bvect*  bv2 = new bvect();
    test_bitset*  bset = new test_bitset();
    int value = 0;

    SimpleFillSets(*bset, *bv1, 0, BSIZE, 10);
    SimpleFillSets(*bset, *bv2, 0, BSIZE, 10);

    {
    TimeTaker tt("BitCompare. Random bitvector. BM.", REPEATS);
    for (unsigned int i = 0; i < REPEATS; ++i)
    {    
        value+=bv1->compare(*bv2);
    }
    }

    delete bset;
    delete bv1;
    delete bv2;

    }


    unsigned cnt = REPEATS * 50000;
    unsigned* arr1 = new unsigned[cnt];
    unsigned* arr2 = new unsigned[cnt];

    unsigned i;
    for (i = 0; i < cnt; ++i)
    {
        if ((rand() % 10) == 0)
        {
            arr1[i] = 0;
        }
        else 
        {
            arr1[i] = rand();
            arr2[i] = rand();   
        }
    }

    {
    TimeTaker tt("wordcmp complex. Random words comparison. BM.", cnt);

    for (i = 0; i < cnt; ++i)
    {    
        int res2 = bm::wordcmp(arr1[i], arr2[i]);
        int res = bm::wordcmp0(arr1[i], arr2[i]);

        if (res != res2)
        {
            cerr << "Incorrect result ! " << arr1[i] 
                 << "<=>" << arr2[i] << " res=" << res <<
                 endl;
            exit(1);
        }
    }
    }

    int c = 0;
    volatile void* p = &c;

    {
    TimeTaker tt("wordcmp0. Random words comparison. BM.", cnt);
    for (i = 0; i < cnt; ++i)
    {    
        c += bm::wordcmp0(arr1[i], arr2[i]);
    }
    }


    {
    TimeTaker tt("wordcmp. Random words comparison. BM.", cnt);
    for (i = 0; i < cnt; ++i)
    {    
        c += bm::wordcmp(arr1[i], arr2[i]);
    }
    }

    c = 0;;

    delete [] arr1;
    delete [] arr2;


}

void EnumeratorTest()
{
    bvect*  bv = new bvect();
    test_bitset*  bset = new test_bitset();
    unsigned value = 0;

    FillSetsIntervals(*bset, *bv, 0, BSIZE, 10);

    unsigned cnt1 = bv->count();
    unsigned cnt2 = bset->count();

    
    unsigned i;

    {
    TimeTaker tt("Enumerator. BM.", REPEATS);
    for (i = 0; i < REPEATS/10; ++i)
    {    
        bvect::enumerator en = bv->first();
        bvect::enumerator bend = bv->end();

        while (en < bend)
        {
            value = *en;
            ++en;
        }
    }
    }


    // -----------------------------------------------

    {
    TimeTaker tt("get_next(). BM.", REPEATS/10);
    for (i = 0; i < REPEATS/10; ++i)
    {
        if (bv->count())
        {
            unsigned value = bv->get_first();
            do
            {
                value = bv->get_next(value);
            } while ( value );
        }
    }

    delete bset;
    delete bv;
    }

}


void EnumeratorTestGAP()
{
    bvect*  bv = new bvect();
    test_bitset*  bset = new test_bitset();
    unsigned i;
    unsigned value = 0;

    SimpleFillSets(*bset, *bv, 0, BSIZE, 2500);

    for (int k = 0; k < 2; ++k)
    {

    {
    TimeTaker tt("Sparse bvector (enumerator). BM.", REPEATS);
    for (i = 0; i < REPEATS/10; ++i)
    {    
        bvect::enumerator en = bv->first();
        bvect::enumerator bend = bv->end();

        while (en < bend)
        {
            value = *en;
            ++en;
        }
    }

    }

    // -----------------------------------------------

    {
    TimeTaker tt("Sparse bvector (get_next). BM.", REPEATS);

    for (i = 0; i < REPEATS/10; ++i)
    {
        if (bv->count())
        {
            unsigned value = bv->get_first();
            do
            {
                value = bv->get_next(value);
            } while ( value );
        }
    }

    }

    {

    bv->optimize();
    }

    cout << "Testing optimized vectors." << endl;
    }

    delete bv;
    delete bset;
    // -----------------------------------------------

}

void InvertTest()
{
    bvect*  bv = new bvect();
    test_bitset*  bset = new test_bitset();
    unsigned i;
    unsigned value = 0;

    SimpleFillSets(*bset, *bv, 0, BSIZE, 2500);
    {
    TimeTaker tt("Invert bvector. BM.", REPEATS/10);
    for (i = 0; i < REPEATS/10; ++i)
    {
        bv->flip();    
    }
    }

    {
    TimeTaker tt("Invert bvector. STL.", REPEATS);
    for (i = 0; i < REPEATS/10; ++i)
    {
        bset->flip();    
    }
    }

    delete bv;
    delete bset;
}


void AndTest()
{
    bvect*  bv1 = new bvect();
    test_bitset*  bset1 = new test_bitset();
    test_bitset*  bset2 = new test_bitset();
    bvect*  bv2 = new bvect();
    unsigned i;
    unsigned value = 0;

    SimpleFillSets(*bset1, *bv1, 0, BSIZE, 100);
    SimpleFillSets(*bset1, *bv2, 0, BSIZE, 100);
    {
    TimeTaker tt("AND bvector test. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        *bv1 &= *bv2;
    }
    }

    {
    TimeTaker tt("AND bvector test. STL.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        *bset1 &= *bset2;
    }
    }

    delete bv1;
    delete bv2;

    delete bset1;
    delete bset2;
}


void SubTest()
{
    bvect*  bv1 = new bvect();
    test_bitset*  bset1 = new test_bitset();
    bvect*  bv2 = new bvect();
    unsigned i;
    unsigned value = 0;

    SimpleFillSets(*bset1, *bv1, 0, BSIZE, 100);
    SimpleFillSets(*bset1, *bv2, 0, BSIZE, 100);
    delete bset1;

    {
    TimeTaker tt("SUB bvector test. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        *bv1 -= *bv2;
    }
    }
    delete bv1;
    delete bv2;
}


void XorCountTest()
{
    bvect*  bv1 = new bvect();
    bvect*  bv2 = new bvect();
    test_bitset*  bset1 = new test_bitset();
    test_bitset*  bset2 = new test_bitset();
    unsigned i;

    SimpleFillSets(*bset1, *bv1, 0, BSIZE, 400);
    SimpleFillSets(*bset2, *bv2, 0, BSIZE, 500);

    unsigned count1 = 0;
    unsigned count2 = 0;
    unsigned test_count = 0;

    {
    bvect bv_tmp;
    TimeTaker tt("XOR COUNT bvector test with TEMP vector. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        bv_tmp.clear(false);
        bv_tmp |= *bv1;
        bv_tmp ^= *bv2;
        count1 += bv_tmp.count();
    }
    }


    {
    test_bitset*  bset_tmp = new test_bitset();
    TimeTaker tt("XOR COUNT bvector test with TEMP vector. STL.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        bset_tmp->reset();
        *bset_tmp |= *bset1;
        *bset_tmp ^= *bset2;
        test_count += bset_tmp->count();
    }
    }


    {
    TimeTaker tt("XOR COUNT bvector test. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        count2 += bm::count_xor(*bv1, *bv2);
    }
    }

    
    if (count1 != count2)
    {
        cout << "Check failed !" << endl;
        cout << count1 << " " << count2 << " " << test_count << endl;
        exit(1);
    }
    count1 = count2 = 0;
    
    // -----------------------------------------
    cout << "One optimized vector" << endl;
    bv2->optimize();
    //bv2->stat();
    {
    bvect bv_tmp;
    TimeTaker tt("XOR COUNT bvector test with TEMP vector. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        bv_tmp.clear(false);
        bv_tmp |= *bv1;
        bv_tmp ^= *bv2;
        count1 += bv_tmp.count();
    }
    }

    {
    TimeTaker tt("XOR COUNT bvector test. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        count2 += bm::count_xor(*bv1, *bv2);
    }
    }

    if (count1 != count2)
    {
        cout << "Check failed !" << endl;
        exit(1);
    }
    count1 = count2 = 0;

    // -----------------------------------------
    cout << "Both vectors optimized" << endl;
    bv1->optimize();
    //bv1->stat();
    {
    bvect bv_tmp;
    TimeTaker tt("XOR COUNT bvector test with TEMP vector. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        bv_tmp.clear(false);
        bv_tmp |= *bv1;
        bv_tmp ^= *bv2;
        count1 += bv_tmp.count();
    }
    }

    {
    TimeTaker tt("XOR COUNT bvector test. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        count2 += bm::count_xor(*bv1, *bv2);
    }
    }
    if (count1 != count2)
    {
        cout << "Check failed !" << endl;
        exit(1);
    }
    count1 = count2 = 0;


    delete bv1;
    delete bv2;
    
    delete bset1;
    delete bset2;    
}


void TI_MetricTest()
{
    bvect*  bv1 = new bvect();
    bvect*  bv2 = new bvect();
    test_bitset*  bset1 = new test_bitset();
    test_bitset*  bset2 = new test_bitset();
    unsigned i;

    SimpleFillSets(*bset1, *bv1, 0, BSIZE, 500);
    SimpleFillSets(*bset2, *bv2, 0, BSIZE, 250);

    unsigned count1 = 0;
    unsigned count2 = 0;
    unsigned countA=0, countB=0, test_countA=0, test_countB=0;
    unsigned test_count = 0;
    double ti1=0, ti2=0, test_ti=0;

    {
    TimeTaker tt("Tversky Index bvector test vector. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        count1 = bm::count_and(*bv1, *bv2);
        
        countA = bm::count_sub(*bv1, *bv2);
        countB = bm::count_sub(*bv2, *bv1);
        
        ti1 = double(count1) / double(0.4*countA + 0.5*countB + count1);
    }
    }


    {
    test_bitset*  bset_tmp = new test_bitset();
    double test_dice = 0;
    TimeTaker tt("Dice bvector test with TEMP vector. STL.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        bset_tmp->reset();
        *bset_tmp |= *bset1;
        *bset_tmp &= *bset2;
        test_count += bset_tmp->count();
        
        test_countA += bset1->count();
        test_countB += bset2->count();
        
        test_countA += bset1->count();
        test_countB += bset2->count();
        
        test_dice += double(2*test_count) / double(test_countA + test_countB);
    }
    }


    {
    bm::distance_metric_descriptor dmd[3];
    dmd[0].metric = bm::COUNT_AND;
    dmd[1].metric = bm::COUNT_SUB_AB;
    dmd[2].metric = bm::COUNT_SUB_BA;    
    
    TimeTaker tt("Tversky Index bvector test (pipeline). BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        bm::distance_operation(*bv1, *bv2, &dmd[0], (&dmd[0])+3);
                
        ti2 = double(dmd[0].result) / double(0.4*dmd[1].result + 0.5*dmd[2].result + dmd[0].result);
        
        dmd[0].result = dmd[1].result = dmd[2].result = 0;
    }
    }

    
    if (fabs(ti2 - ti1) > 0.1)
    {
        cout << "Check failed ! error=" << fabs(ti2 - ti1) << endl;
        cout << ti1 << " " << ti2 << endl;
        exit(1);
    }
    count1 = count2 = 0;

    // -----------------------------------------
    cout << "One optimized vector" << endl;
    bv2->optimize();
    bv1->count(); // trying to fool the CPU cache

    
    {
    TimeTaker tt("Dice metric bvector test. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        count1 = bm::count_and(*bv1, *bv2);
        
        countA = bm::count_sub(*bv1, *bv2);
        countB = bm::count_sub(*bv2, *bv1);
        
        ti1 = double(count1) / double(0.4*countA + 0.5*countB + count1);
    }
    }



    {
    bm::distance_metric_descriptor dmd[3];
    dmd[0].metric = bm::COUNT_AND;
    dmd[1].metric = bm::COUNT_SUB_AB;
    dmd[2].metric = bm::COUNT_SUB_BA;    
    
    TimeTaker tt("Tversky Index bvector test (pipeline). BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        bm::distance_operation(*bv1, *bv2, &dmd[0], (&dmd[0])+3);
                
        ti2 = double(dmd[0].result) / double(0.4*dmd[1].result + 0.5*dmd[2].result + dmd[0].result);
        
        dmd[0].result = dmd[1].result = dmd[2].result = 0;
    }
    }


    if (fabs(ti2 - ti1) > 0.1)
    {
        cout << "Check failed !" << endl;
        cout << ti1 << " " << ti2 << endl;
        exit(1);
    }
    count1 = count2 = 0;
    count1 = count2 = 0;

    // -----------------------------------------
    cout << "Both vectors optimized" << endl;
    bv1->optimize();

    {
    TimeTaker tt("Tversky index bvector test. BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        count1 = bm::count_and(*bv1, *bv2);
        
        countA = bm::count_sub(*bv1, *bv2);
        countB = bm::count_sub(*bv2, *bv1);
        
        ti1 = double(count1) / double(0.4*countA + 0.5*countB + count1);
    }
    }

    {
    bm::distance_metric_descriptor dmd[3];
    dmd[0].metric = bm::COUNT_AND;
    dmd[1].metric = bm::COUNT_SUB_AB;
    dmd[2].metric = bm::COUNT_SUB_BA;    
    
    TimeTaker tt("Tversky Index bvector test (pipeline). BM.", REPEATS);
    for (i = 0; i < REPEATS; ++i)
    {
        bm::distance_operation(*bv1, *bv2, &dmd[0], (&dmd[0])+3);
                
        ti2 = double(dmd[0].result) / double(0.4*dmd[1].result + 0.5*dmd[2].result + dmd[0].result);
        
        dmd[0].result = dmd[1].result = dmd[2].result = 0;
    }
    }

    if (fabs(ti2 - ti1) > 0.1)
    {
        cout << "Check failed !" << endl;
        cout << ti1 << " " << ti2 << endl;
        exit(1);
    }


    delete bv1;
    delete bv2;
    
    delete bset1;
    delete bset2;    
}


int main(void)
{

    MemCpyTest();

    BitCountTest();

    BitCountSparseTest();

    BitCompareTest();
    EnumeratorTest();
    EnumeratorTestGAP();

    AndTest();
    SubTest();  

    InvertTest();  

    XorCountTest();

    TI_MetricTest();
        
    return 0;
}
