/*---------------------------------------------------------------
 * File:	kgram.h
 *
 * Author:	Liu Yang
 * Date:	Aug 2, 2009
 *
 * TO IMPLEMENT MULTI-BYTE MATCHING
 * --------------------------------------------------------------
 *  $Log: kgram.h,v $
 *  Revision 1.7  2010/09/27 14:21:06  lyangru
 *  final synchronization
 *
 *  Revision 1.1  2009/09/02 01:57:00  lyangru
 *  startup
 *
 * */
#include <fstream>
#include <map>
#include <set>
#include <vector>
#include <string>
#include <iostream>

//#define DEBUG
#define TRACE_SIMU

#define ALPHABET_SIZE	256

using namespace std;


typedef struct kgram_nfa {

    unsigned int num_states;
    set<unsigned int> starts;
    unsigned char kgram_size;
    unsigned int num_di_trans;	/* number of digram transitions */
    /* regular transition table of NFA */
    multimap<unsigned char, unsigned int> *trans_tab;
    /* k-gram transition table of NFA 
     * string denotes a kgram, the first set denotes the next set of states,
     * and the second set denotes the accept ids (if there is) */  
    //vector< map<string, pair< set<unsigned int>,  set<unsigned int> > > > ktrans_tab;  
    vector< map<string, pair< set<unsigned int>,  set<unsigned int>* > > > ktrans_tab;  
    unsigned int **acc_ids;
    multimap<unsigned int, unsigned int> accept_ids;	/* keep signature ids for Rezwana use */

    /* ----- variables for alphabet reduction ------ */
    vector<string> digrams;
    set< set<unsigned int> > target_sets;
    /* map kgram to unsigned int */
    map<string, unsigned int> class_c;
    /* reduced kgram transition table */
    //vector< map<unsigned int, pair< set<unsigned int>,  set<unsigned int>* > > > rdt_ktrans_tab;  
    map<unsigned int, pair< set<unsigned int>,  set<unsigned int>* > > *rdt_ktrans_tab;
    unsigned int num_rdt_trans; 
    

    /* --------fill_trans_tab---------------------
     * Fill out regular transition table from an NFA file
     * where epsilon transitions have been removed
     * Pre	f_nfa is a txt representation of an NFA
     * Post	trans_tab, num_states, and starts are filled out */   
    void fill_trans_tab(char *f_nfa);

    /* -----------fill_rdt_trans_tab---------------
     * Fill out the aphabet reduced transition table 
     * from a text file which was generated from a regular NFA 
     * Pre	f is a text representation of reduced transitions
     * Post	rdt_ktrans_tab has filled out */
    void fill_rdt_trans_tab(char *f);

    /* --------construct_digram_trans()-----------
     * Construct a transition table with digrams as input
     * Pre	trans_tab, num_states, and starts are filled out
     * Post	ktrans_tab are filled out */
    //void construct_digram_trans();
    void construct_digram_trans2();

    /* ---------kgram_alphabet_gen-----------------
     * Generate alphabet for kgrams */
    void kgram_alphabet_gen(unsigned char k);

    /* ----------construct_multi_byte_trans-----------
     * Construct a transition table with k-grams as input */	

    void construct_multi_byte_trans(unsigned char kgram_size);
    void construct_trigram_trans();

    /* -----------delta------------------------------
     * Transition function of an NFA
     * Pre	state denotes the current state and sym is an input symbal
     * Post	the next set of states s_next is obtained */ 
    void delta(unsigned int state, 
	       unsigned char sym, 
	       set<unsigned int>& s_next);

    /* ------------delta-----------------------------
     * Transition function of an NFA for k-grams
     * Pre	cur_st denotes the current set of states (frontiers)
     * 		kgram is a k-gram string
     * Post	next set of states next_st is obtained 
     * 		if one or more accepting states are missed, the sig ids
     * 		are put in sig_ids */
    void delta(unsigned int cur_st, 
	       unsigned char *kgram, 
	       unsigned char len, 
	       set<unsigned int> &next_st,
	       set<unsigned int> &sig_ids);

    /* -----------simulate------------------------------
     * NFA simulation with k-grams as input
     * Pre	buf denotes stream to be matched
     * 		len is the length of stream in buffer
     * 		kgram_size denotes how many bytes are fetched each time
     * Post	offsets and signature id's are reported if matches are detected */
    //void simulate(const unsigned char *buf, unsigned int len, unsigned char kgram_size);
    void simulate2(const unsigned char *buf, unsigned int len, unsigned char kgram_size);
    void simulate(string buf, unsigned char kgram_size);

    /* -------------output_kgram------------------------
     * Output kgram transition table to a text file
     * Pre	kgram_trans is filled out
     * Post	kgram transition table is output to a text file f_kgram */
    void output_kgram(char *f_kgram);

    /* ------------functions for alphabet reduction-------------*/
    /* -----------init_alphabet_map-----------
     * map all kgrams to 0*/
    void init_alphabet_map(unsigned char k);
    void alphabet_reduction();
    void alphabet_reduction_im();
    /* ---------------rdt_trans_tab_gen-------------
     * generates transition table with reduced alphabet 
     * Pre	rdt_ktrans_tab is empty 
     * 		ktrans_tab should be ready
     * Post	rdt_ktrans_tab has been filled out */
    void rdt_trans_tab_gen();
    void dump_rdt_transitions(char *f);
    void rdt_simulate(const unsigned char *buf, unsigned int len);    
    /* rdt_simulate2 is more efficient than rdt_simulate before the latter
     * does the digrams mapping on the fly, which is more time-consuming */
    void rdt_simulate2(unsigned int *buf, unsigned int len);

    /* ------------load_kgram_mapping-------------
     * load the kgram mapping table from a file 
     * Pre	class_c is not filled out
     * 		f is a file containg kgram mapping table 
     * Post	class_c is filled out */
    void load_kgram_mapping(char *f);

} kgram_nfa;

/* ---------------------NON-MEMBER FUNCTIONS------------------- */
/* set kgram_covered and class_covered to false */
void init_kgram_covered(map<string, bool>& kg_covered);
void init_class_covered(map<string, bool>& cl_covered);
/* map all kgrams to 0 */
void init_remap(map<string, unsigned int>& rm);

