/*---------------------------------------------------------------------------
 * File:	nfa-multimap.cc
 *
 * Author:	Liu Yang
 * Date:	Jun 14, 2009
 *
 *------------------------------------------------------------------------
 * $Log: nfa-multimap.cc,v $
 * Revision 1.23  2010/09/27 14:21:07  lyangru
 * final synchronization
 *
 * Revision 1.1  2009/09/02 01:57:00  lyangru
 * startup
 *
 * Revision 1.7  2009/06/22 13:40:09  lyangru
 * Modified the combine, out_to_file, accept_or_not member functions after adding start
 * */

#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <set>
#include <queue>
#include <fstream>
#include <cstdlib>
#include <string.h>
#include "nfa-multimap.h"
//#include "clock.h"

//#define TRACE_SIMUL

//#define TRACE_EPS_ELIM
//#define TRACE_STARTS
//#define TRACE_ALPHA_REDUCTION

using namespace std;
using std::ifstream;

multimap_nfa::multimap_nfa() {
  num_states = 0;
  num_accept_ids = 0;
  num_transitions = 0;
  num_starts = 0;
  eps_eliminated = false;
}

multimap_nfa::multimap_nfa(int n_states, int s_start, vector<int> a_states, int **trans, int n_transitions) {
  multimap<int, int> **p_state_entry;
  int i;

  num_states = n_states;
  //start = s_start;
  starts.insert(s_start);
  cout << "constructor num_states" << num_states << endl; 
  accepting_states = a_states;
  num_transitions = n_transitions;
  p_state_entry = (multimap<int, int> **)malloc(num_states * sizeof(multimap<int, int> *));
  for (i = 0; i < num_states; i++)
    p_state_entry[i] = new multimap<int, int>();

  for (i = 0; i < num_transitions; i++) {
    p_state_entry[trans[i][0]]->insert(pair<int, int>(trans[i][1], trans[i][2]));
  }
  for (i = 0; i < num_states; i++)
    transitions.push_back(*p_state_entry[i]);
}

/* Format of a NFA transition file 
 * -------------------------------
 * 1. number of states 
 * 2. number of (acc_state_no, accept_id) pairs
 *    acc_state_no accept_id
 *    ...
 * 3. number of accepting states accepting states 
 * 4. start state no
 * 5. number of transitions
 *    transition
 *    ....
 * */
/*
multimap_nfa::multimap_nfa(char *nfile, bool eps_el) {
  ifstream nfa_file;
  int i, num_accepting_states, tmps, s_start, j;
  int **T_transitions;
  multimap<int, int> **p_state_entry;
  int tmp_st, tmp_acc_id, tmp_loc_s;
  multimap<int, int>::iterator ita;
 
  nfa_file.open(nfile);
  if (!nfa_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }

  // read the number of states and num of (acc_state_no, accept_id) pairs 
  nfa_file >> num_states >> num_accept_ids;
  for (i = 0; i < num_accept_ids; i++) {
    nfa_file >> tmp_st >> tmp_acc_id;
    accept_ids.insert(pair<int, int>(tmp_st, tmp_acc_id));
    // fill out the actual start states for epsilon elimination use 
    if (i == 0) {
      all_starts.insert(2);
      all_starts.insert(tmp_st + 1 + 2);
      real_starts.push_back(2);
      real_starts.push_back(tmp_st + 1 + 2);
    }
    else if (i < num_accept_ids - 1) {
      real_starts.push_back(tmp_st + 1 + 2 + 2);
      added_starts.push_back(tmp_st + 1);
      added_acc_sts.push_back(tmp_st + 2);
      all_starts.insert(tmp_st + 1 + 2 + 2);
      all_starts.insert(tmp_st + 1);
    }
    // fill the added starts to all-starts for epsilon elimination use 
    else { // skip the first one 
      all_starts.insert(tmp_st + 1);
      added_starts.push_back(tmp_st + 1);
      // collect added accepting states 
      added_acc_sts.push_back(tmp_st + 2);
    }
  }
  
  // construct a map used for epsilon elimination 
  if (num_accept_ids > 1) {
    eps_starts_map.insert(pair<int, int>(real_starts[0], added_starts[0]));
    eps_starts_map.insert(pair<int, int>(real_starts[1], added_starts[0]));
    for (j = 2; j < real_starts.size(); j++) {
      eps_starts_map.insert(pair<int, int>(real_starts[j], added_starts[j-1]));
    }
    for (j = 0; j < added_starts.size() - 1; j++) {
      eps_starts_map.insert(pair<int, int>(added_starts[j], added_starts[j+1]));
    }
  }


  if (num_accept_ids > 2) { 
    ita = accept_ids.begin();
    ita++;
    local_states_num_ranges.insert(pair<int, int>(0, ita->first + 1));
#ifdef TRACE_STARTS
  cout << "local range start: " << 0 << " end: " << ita->first + 1 << endl;
#endif
    tmp_loc_s = ita->first;
    ita++;
    while (ita != accept_ids.end()) {
      local_states_num_ranges.insert(pair<int, int>(tmp_loc_s + 1, ita->first + 1));
#ifdef TRACE_STARTS
  cout << "local range start: " << tmp_loc_s + 1 << " end: " << ita->first + 1 << endl;
#endif
      tmp_loc_s = ita->first;
      ita++;
    } 
  } 

  nfa_file >> num_accepting_states;
  for (i = 0; i < num_accepting_states; i++) {
    nfa_file >> tmps;
    accepting_states.push_back(tmps);
  }   

  // read start state and the number of transitions 
  if (!eps_el) {
    nfa_file >> s_start >> num_transitions;
    starts.insert(s_start);
    eps_eliminated = false;
  } else if (eps_el) {
    nfa_file >> num_starts;
    for (i = 0; i < num_starts; i++) {
      nfa_file >> tmps;
      starts.insert(tmps);
    }
    nfa_file >> num_transitions;
  } else {
    cerr << "invalid flag" << endl;
    exit(1);
  }

  // allocate space for transition table 
  T_transitions = (int **)malloc(num_transitions * sizeof(int *));
  if (T_transitions == NULL)
    perror("out of memory"), exit(1);
  
  for (i = 0; i < num_transitions; i++) {
    T_transitions[i] = (int *)malloc(NUM_COLUMN * sizeof(int));
    if (T_transitions[i] == NULL)
      perror("out of memory"), exit(1);
  }

  // read the transitions 
  for (i = 0; i < num_transitions; i++) 
    nfa_file >> T_transitions[i][0] >> T_transitions[i][1] >> T_transitions[i][2];
  
  nfa_file.close();  

  // fill the transitions into multimap 
  p_state_entry = (multimap<int, int> **)malloc(num_states * sizeof(multimap<int, int> *));
  for (i = 0; i < num_states; i++) 
    p_state_entry[i] = new multimap<int, int>();

  for (i = 0; i < num_transitions; i++) {
    p_state_entry[T_transitions[i][0]]->insert(pair<int, int>(T_transitions[i][1], T_transitions[i][2]));
  }

  // put multimaps to the vector of transitions 
  transitions.resize(num_states);
  for (i = 0; i < num_states; i++)
    //transitions.push_back(*p_state_entry[i]);
    transitions[i] = *p_state_entry[i];

  // find out the starts: this is a correction for a previous bug 
  queue<int> q_starts;
  set<int> reachable_starts;
  set<int>::iterator its;
  q_starts.push(s_start);
  while (!q_starts.empty()) {
    find_reachable_starts(q_starts.front(), reachable_starts);
    q_starts.pop();
    if (!reachable_starts.empty()) {
      starts.insert(reachable_starts.begin(), reachable_starts.end());
      for (its = reachable_starts.begin(); its != reachable_starts.end(); its++) {
        q_starts.push(*its);
      }
      reachable_starts.clear();
    }
  }

  for (i = 0; i < num_transitions; i++)
    free(T_transitions[i]);

  free(T_transitions);
  
  for (i = 0; i < num_states; i++)
    free(p_state_entry[i]);

  free(p_state_entry);
}
*/

multimap_nfa::multimap_nfa(char *nfile, bool eps_el) {
  ifstream nfa_file;
  int i, num_accepting_states, tmps, s_start, j;
  int **T_transitions;
  multimap<int, int> **p_state_entry;
  int tmp_st, tmp_acc_id, tmp_loc_s;
  multimap<int, int>::iterator ita;
 
  nfa_file.open(nfile);
  if (!nfa_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }

  /* read the number of states and num of (acc_state_no, accept_id) pairs */
  nfa_file >> num_states >> num_accept_ids;
  for (i = 0; i < num_accept_ids; i++) {
    nfa_file >> tmp_st >> tmp_acc_id;
    accept_ids.insert(pair<int, int>(tmp_st, tmp_acc_id));
    /* fill out the actual start states for epsilon elimination use */
    if (i == 0) {
      all_starts.insert(2);
      all_starts.insert(tmp_st + 1 + 2);
      real_starts.push_back(2);
      real_starts.push_back(tmp_st + 1 + 2);
    }
    else if (i < num_accept_ids - 1) {
      real_starts.push_back(tmp_st + 1 + 2 + 2);
      added_starts.push_back(tmp_st + 1);
      added_acc_sts.push_back(tmp_st + 2);
      all_starts.insert(tmp_st + 1 + 2 + 2);
      all_starts.insert(tmp_st + 1);
    }
    /* fill the added starts to all-starts for epsilon elimination use */
    else { /* skip the first one */
      all_starts.insert(tmp_st + 1);
      added_starts.push_back(tmp_st + 1);
      /* collect added accepting states */
      added_acc_sts.push_back(tmp_st + 2);
    }
  }
  
  /* construct a map used for epsilon elimination */
  if (num_accept_ids > 1) {
    eps_starts_map.insert(pair<int, int>(real_starts[0], added_starts[0]));
    eps_starts_map.insert(pair<int, int>(real_starts[1], added_starts[0]));
    for (j = 2; j < real_starts.size(); j++) {
      eps_starts_map.insert(pair<int, int>(real_starts[j], added_starts[j-1]));
    }
    for (j = 0; j < added_starts.size() - 1; j++) {
      eps_starts_map.insert(pair<int, int>(added_starts[j], added_starts[j+1]));
    }
  }

  /* build local state number ranges for individual NFA's */
  /*if (num_accept_ids > 2) {
    ita = accept_ids.begin();
    local_states_num_ranges.insert(pair<int, int>(0, ita->first));
    tmp_loc_s = ita->first;
    ita++;
    local_states_num_ranges.insert(pair<int, int>(tmp_loc_s + 1, ita->first));
    tmp_loc_s = ita->first;
    ita++;
    while (ita != accept_ids.end()) {
      local_states_num_ranges.insert(pair<int, int>(tmp_loc_s + 3, ita->first));
      tmp_loc_s = ita->first;
      ita++;
    } 
  }*/ /* endif */

  if (num_accept_ids > 2) { /* modified in order to not considering is_start */
    ita = accept_ids.begin();
    //local_states_num_ranges.insert(pair<int, int>(0, ita->first));
    //tmp_loc_s = ita->first;
    ita++;
    local_states_num_ranges.insert(pair<int, int>(0, ita->first + 1));
#ifdef TRACE_STARTS
  cout << "local range start: " << 0 << " end: " << ita->first + 1 << endl;
#endif
    tmp_loc_s = ita->first;
    ita++;
    while (ita != accept_ids.end()) {
      local_states_num_ranges.insert(pair<int, int>(tmp_loc_s + 1, ita->first + 1));
#ifdef TRACE_STARTS
  cout << "local range start: " << tmp_loc_s + 1 << " end: " << ita->first + 1 << endl;
#endif
      tmp_loc_s = ita->first;
      ita++;
    } /* this will cause one more redundent range, but sounds no hurt */
  } 

  /* read number of accepting states and the accepting state array */
  nfa_file >> num_accepting_states;
  for (i = 0; i < num_accepting_states; i++) {
    nfa_file >> tmps;
    accepting_states.push_back(tmps);
  }   

  /* read start state and the number of transitions */
  if (!eps_el) {
    nfa_file >> s_start >> num_transitions;
    starts.insert(s_start);
    eps_eliminated = false;
  } else if (eps_el) {
    nfa_file >> num_starts;
    for (i = 0; i < num_starts; i++) {
      nfa_file >> tmps;
      starts.insert(tmps);
    }
    nfa_file >> num_transitions;
  } else {
    cerr << "invalid flag" << endl;
    exit(1);
  }

#ifdef TRACE_EPS_ELIM
  cout << "Number of transitions " << num_transitions << endl;
#endif
  /* allocate space for transition table */
  T_transitions = (int **)malloc(num_transitions * sizeof(int *));
  if (T_transitions == NULL)
    perror("out of memory"), exit(1);
  
  for (i = 0; i < num_transitions; i++) {
    T_transitions[i] = (int *)malloc(NUM_COLUMN * sizeof(int));
    if (T_transitions[i] == NULL)
      perror("out of memory"), exit(1);
  }

  /* read the transitions */
  for (i = 0; i < num_transitions; i++) {
#ifdef TRACE_EPS_ELIM
  if ((i % 100000) == 0) {
    cout << "Read "<< i << "th transition ..." << endl;
  }
#endif
    nfa_file >> T_transitions[i][0] >> T_transitions[i][1] >> T_transitions[i][2];
  }
  
  nfa_file.close();  

  /* fill the transitions into multimap */
//  p_state_entry = (multimap<int, int> **)malloc(num_states * sizeof(multimap<int, int> *));
//  for (i = 0; i < num_states; i++) 
//    p_state_entry[i] = new multimap<int, int>();

  transitions.resize(num_states);
  for (i = 0; i < num_transitions; i++) {
#ifdef TRACE_EPS_ELIM
  if ((i % 100000) == 0) {
    cout << "fill out "<< i << "th transition ..." << endl;
  }
#endif
    //p_state_entry[T_transitions[i][0]]->insert(pair<int, int>(T_transitions[i][1], T_transitions[i][2]));
    transitions[T_transitions[i][0]].insert(pair<int, int>(T_transitions[i][1], T_transitions[i][2]));
    free(T_transitions[i]);
  }

  /* put multimaps to the vector of transitions */
  //transitions.resize(num_states);
  //for (i = 0; i < num_states; i++)
    //transitions.push_back(*p_state_entry[i]);
    //transitions[i] = *p_state_entry[i];

  /* find out the starts: this is a correction for a previous bug */
  //queue<int> q_starts;
  set<int> reachable_starts;
  set<int>::iterator its;
  //q_starts.push(s_start);
  //i = 15000000;
/*  while (!q_starts.empty() && (i > 0)) {
    i--;
    find_reachable_starts(q_starts.front(), reachable_starts);
    q_starts.pop();
    if (!reachable_starts.empty()) {
      starts.insert(reachable_starts.begin(), reachable_starts.end());
      for (its = reachable_starts.begin(); its != reachable_starts.end(); its++) {
        q_starts.push(*its);
      }
      reachable_starts.clear();
    }
  }*/ 

  set<int> q_starts;
  q_starts.insert(s_start);
  set<int>::iterator its2;
  while (!q_starts.empty()) {
    its2 = q_starts.begin();
    find_reachable_starts(*its2, reachable_starts);
    //q_starts.pop();
    q_starts.erase(its2);
#ifdef TRACE_EPS_ELIM
  cout << "Size of q_starts " << q_starts.size() << endl;
#endif
    if (!reachable_starts.empty()) {
      starts.insert(reachable_starts.begin(), reachable_starts.end());
      for (its = reachable_starts.begin(); its != reachable_starts.end(); its++) {
        q_starts.insert(*its);
      }
      reachable_starts.clear();
    }
  }
#ifdef TRACE_EPS_ELIM
  cout << "Number of starts " << starts.size() << endl;
#endif

  /* free space */
//  for (i = 0; i < num_transitions; i++)
//    free(T_transitions[i]);

  free(T_transitions);
  
/*  for (i = 0; i < num_states; i++)
    free(p_state_entry[i]);

  free(p_state_entry); */
}


void multimap_nfa::find_reachable_starts(int st, set<int> &rs) {
  multimap<int, int>::iterator it;
  pair<multimap<int, int>::iterator, multimap<int, int>::iterator> ret;
  ret = transitions[st].equal_range(-1);
  for (it = ret.first; it != ret.second; ++it) {
#ifdef TRACE_EPS_ELIM
  //cout << "find_reachable_starts " << it->second << endl;
#endif
    rs.insert(it->second);
  }
}

void multimap_nfa::display_transitions() {
  unsigned int i;
  for (i = 0; i < transitions.size(); i++) {
    cout << "state[" << i << "]" <<endl;
    display_map(transitions[i]);
  }
}

void multimap_nfa::delta(int state, int symbol, set<int>& s_next) {
  multimap<int, int>::iterator it;
  pair< multimap<int,int>::iterator, multimap<int,int>::iterator > ret;
  if ((state < 0) || (state >= num_states))
    perror("invalid state"), exit(1);  

  ret = transitions[state].equal_range(symbol);
  for (it = ret.first; it != ret.second; ++it) {
    s_next.insert(it->second);
  }
 
}


void multimap_nfa::combine(multimap_nfa *x) {
  int n_states_1, n_states_2, i;
  vector<int> old_accepting_states;
  multimap<int, int> *p_state_entry;
  int start1, start2;

  start1 = *starts.begin();
  start2 = *(x->starts.begin());
  n_states_1 = num_states;
  old_accepting_states = accepting_states;
  n_states_2 = x->num_states;
  /* the number of states after combination */
  num_states += x->num_states + 2;

  /* number of states after combination */
  num_transitions += x->num_transitions + accepting_states.size() + x->accepting_states.size() + 2;

  accepting_states.clear();
  /* the new accepting state */
  accepting_states.push_back(num_states - 1);

  /* update the transitions */
  transitions.resize(num_states);

  /* shift the transitions of the first nfa (current nfa) by 1 */
  for (i = n_states_1 - 1; i >= 0; i--)
    transitions[i+1] = transitions[i];

  p_state_entry = new multimap<int, int>();
  /* add epsilon transition from the new start state, i.e., 0, 
   * to the start state of the first nfa */
  p_state_entry->insert(pair<int, int>(-1, start1 + 1));
  transitions[0] = *p_state_entry;
//  start = 0;	/* 0 is the start state of the new nfa */
  starts.insert(0);  

  /* rename the state of the first nfa: offset the state number by 1 */  
  for (i = 1; i <= n_states_1; i++) 
    offset_state(transitions[i], 1);

  /* add epsilon transition from state 0 to the start state of nfa2 */  
  transitions[0].insert(pair<int, int>(-1, n_states_1 + start2 + 1));

  /* add epsilon transitions from accepting states of nfa1 to new accepting states */
  for (i = 0; i < old_accepting_states.size(); i++) {
    p_state_entry = new multimap<int, int>();
    p_state_entry->insert(pair<int, int>(-1, num_states - 1));
    transitions[old_accepting_states[i] + 1] = *p_state_entry;
  }

  /* offset the state number of the second nfa by n_states_1 + 1 */
  /*for (i = 0; i < n_states_2; i++)
    offset_state(x->transitions[i], n_states_1 + 1); */

  /* copy the transition table of the second nfa */
  for (i = 0; i < n_states_2; i++) {
    transitions[n_states_1 + 1 + i] = x->transitions[i];
    /* offset the state number of the second nfa by n_states_1 + 1 */
    offset_state(transitions[n_states_1 + 1 + i], n_states_1 + 1);
  }
 
  /* add new epsilon transitions from accepting states of nfa2 to new accepting state */
  for (i = 0; i < x->accepting_states.size(); i++) {
    transitions[x->accepting_states[i] + n_states_1 + 1].insert(pair<int, int>(-1, n_states_1 + n_states_2 + 1));
  }

}


void multimap_nfa::out_to_file(char *f) {
  ofstream out_file;
  unsigned int i;
  int j;
  unsigned starts_size = 0;
  multimap<int, int>::iterator it, ait;
  set<int>::iterator it2;

  out_file.open(f);
  out_file << num_states << endl;
  out_file << num_accept_ids << endl;
  /* output the (acc_state_no, accept_id) pairs */
  ait = accept_ids.begin();
  while (ait != accept_ids.end()) {
    out_file << ait->first << " " << ait->second << endl;
    ait++;
  }
  /* output the accepting states */
  out_file << accepting_states.size();
  for (i = 0; i < accepting_states.size(); i++) {
    out_file << " "<< accepting_states[i];
  }
  out_file << endl;

  /* output start states and num of transitions */
  /* find real starts first */
  /*for (it2 = starts.begin(); it2 != starts.end(); it2++) {
    if (transitions[*it2].empty())
      starts.erase(it2);
  }*/

  for (it2 = starts.begin(); it2 != starts.end(); it2++) {
    if (!transitions[*it2].empty()) {
      starts_size++;
    }
  }
  out_file << starts_size << " "; /* output the number of starts first */
  for (it2 = starts.begin(); it2 != starts.end(); it2++) {
    if (!transitions[*it2].empty()) {	// only output real start states, this will make the number of output starts not equal to the starts.size()
      out_file << *it2 << " ";
    }
  }
  out_file << endl;  
  out_file << num_transitions << endl;

  for (j = 0; j < num_states - 1; j++) {
    it = transitions[j].begin();
    while (it != transitions[j].end()) { 
      out_file << j << " " << it->first << " " << it->second << endl; 
      it++;
    }
  }

  out_file.close();
}

void multimap_nfa::out_to_file(char *f, int st_offset) {
  ofstream out_file;
  unsigned int i;
  int j;
  unsigned starts_size = 0;
  multimap<int, int>::iterator it, ait;
  set<int>::iterator it2;

  out_file.open(f, ios::out | ios::app);
  //out_file << num_states << endl;
  //out_file << num_accept_ids << endl;
  /* output the (acc_state_no, accept_id) pairs */
  /*ait = accept_ids.begin();
  while (ait != accept_ids.end()) {
    out_file << ait->first + st_offset << " " << ait->second << endl;
    ait++;
  }*/
  /* output the accepting states */
  /*out_file << accepting_states.size();
  for (i = 0; i < accepting_states.size(); i++) {
    out_file << " "<< accepting_states[i] + st_offset;
  }
  out_file << endl;*/

/*  for (it2 = starts.begin(); it2 != starts.end(); it2++) {
    if (!transitions[*it2].empty()) {
      starts_size++;
    }
  }
  out_file << starts_size << " "; 
  for (it2 = starts.begin(); it2 != starts.end(); it2++) {
    if (!transitions[*it2].empty()) {
      out_file << *it2 + st_offset << " ";
    }
  }
  out_file << endl;
  out_file << num_transitions << endl; */

  for (j = 0; j < num_states - 1; j++) {
    it = transitions[j].begin();
    while (it != transitions[j].end()) { 
      out_file << j + st_offset << " " << it->first << " " << it->second + st_offset << endl;
      it++;
    }
  }

  out_file.close();
}

bool multimap_nfa::accept_or_not(const char *str) {
  unsigned int i;
  unsigned int slen = strlen(str);
  set<int> next_frontiers;
  set<int>::iterator it;

  /* intial frontiers only include the start state */
  frontiers = starts;  

  for (i = 0; i < slen; i++) {
    it = frontiers.begin();
    while (it != frontiers.end()) {
      delta(*it, (int)str[i], next_frontiers);
      it++;
    }
    frontiers.clear();
    frontiers = next_frontiers;
    next_frontiers.clear();
    /* go to the next symbol */	
  }
  if (is_accepting(frontiers))
    return true;
  return false;
}

/* accept_or_not for stream of characters */
bool multimap_nfa::accept_or_not_stream(const char *str) {
  unsigned int i, match_count = 0;
  unsigned int slen = strlen(str);
  set<int> next_frontiers;
  set<int>::iterator it;
  vector<int> offsets;
  /* intial frontiers only include the start states */
  frontiers = starts;  

  for (i = 0; i < slen; i++) {
    it = frontiers.begin();
    while (it != frontiers.end()) {
      delta(*it, (int)str[i], next_frontiers);
      it++;
    }
    if (is_accepting(next_frontiers)) {
    //if (is_accepting_state(next_frontiers)) {
      cout << "at offset " << i << endl;
      offsets.push_back(i); 
      match_count++;
    }

    if (next_frontiers.empty()) {
#ifdef DEBUG
  cout << "empty frontiers" << endl;
#endif
      /*frontiers.clear();*/
      frontiers = starts;
    } else {
      /*frontiers.clear(); */
      frontiers = next_frontiers;
      next_frontiers.clear();
    }
    /* go to the next symbol */	
  }
#ifdef DEBUG
  cout << "match_count " << match_count << endl;
  for (i = 0; i < offsets.size(); i++)
    cout << offsets[i] << endl;
  cout << endl;
#endif
  if (match_count) 
    return true;

  return false;
}

bool multimap_nfa::is_accepting_state(set<int> s) {
  vector<int>::iterator itv;
  set<int>::iterator its = s.begin();
  pair< multimap<int,int>::iterator, multimap<int,int>::iterator > ret;
  while (its != s.end()) {
    ret = accept_ids.equal_range(*its);
    if (ret.first == ret.second)
	return true;
  
    its++;
  }
  return false;
}

bool multimap_nfa::is_accepting(set<int> s) {
  multimap<int, int>::iterator itv;
  set<int>::iterator its = s.begin();
  pair< multimap<int,int>::iterator, multimap<int,int>::iterator > ret;
  bool b_matched = false;
  while (its != s.end()) {
    ret = accept_ids.equal_range(*its);
    for (itv = ret.first; itv != ret.second; ++itv) {
	cout << "signature [" << itv->second << "] matched ";
	b_matched = true;
    }
    
    its++;
  }
  return b_matched;
}


void multimap_nfa::epsilon_elimination() {
  unsigned int i, j;
  multimap<int, int>::iterator it, it2, it3;
  int eps_start, eps_end, n_trans;
  multimap<int, int> tmp_state;
  set<int> more_starts;
  
  n_trans = 0;

  for (i = 0; i < num_states; i++) {
    for (it = transitions[i].begin(); it != transitions[i].end(); it++) {
      if (it->first == -1) { /* found a epsilon transition */
        eps_start = i;
	eps_end = it->second;
	/* if i is a start state, add the next state to start set */
	if (starts.find(i) != starts.end()) {
	  starts.insert(it->second);
	}
	/* find out all transitions end up with state eps_start */
	for (j = 0; j < num_states; j++) {
	  for (it2 = transitions[j].begin(); it2 != transitions[j].end(); it2++) {
	    if (it2->second == eps_start) {
	      tmp_state.insert(pair<int, int>(it2->first, eps_end));
	    }
	  }
	  /* copy the entries in tmp_state to transitions[j] */
	  for (it3 = tmp_state.begin(); it3 != tmp_state.end(); it3++) {
	    if (!is_member(transitions[j], it3->first, it3->second))
  	      transitions[j].insert(pair<int, int>(it3->first, it3->second));
	  }
          /* clear tmp_state */
	  tmp_state.clear();  
	}
	/* remove this epsilon transition */
	transitions[i].erase(it);
      }
    }
  }
  /* update num_transitions */
  for (i = 0; i < num_states; i++) {
    for (it = transitions[i].begin(); it != transitions[i].end(); it++) {
      n_trans++;
    }
  } 
  num_transitions = n_trans;
}

void multimap_nfa::epsilon_elimination_improved() {
  unsigned int i, j;
  multimap<int, int>::iterator it, it2, it3;
  int eps_start, eps_end, n_trans;
  multimap<int, int> tmp_state;
  set<int> more_starts;
  vector<int> next_eps_states;
  vector<int>::iterator itr;
  
  n_trans = 0;

  for (i = 0; i < num_states; i++) {
#ifdef DEBUG 
    if ((i % 1000) == 0)
      cout << "At state " << i << " for epsilon eliminations ..." << endl;
#endif    
    next_eps_states.clear();
    /* find out all reachable states from i through epsilon transition */
    for (it = transitions[i].begin(); it != transitions[i].end(); it++) {
      if (it->first == -1) { /* found a epsilon transition */
        eps_start = i;
	/* if i is a start state, add the next state to start set */
	if (starts.find(i) != starts.end()) 
	  starts.insert(it->second);
#ifdef TRACE_STARTS
  set<int>::iterator its = starts.begin();
  cout << "set of starts ---- state " << i << endl; 
  cout << "it->first " << it->first << "it->second " << it->second << endl;
  while (its != starts.end()) {
    cout << *its << " ";
    its++;
  }
  cout << endl; 
#endif
	next_eps_states.push_back(it->second);
 	transitions[i].erase(it);
	num_transitions--;
      }
    }
    if (!next_eps_states.empty()) {
      for (j = 0; j < num_states; j++) {
        for (it2 = transitions[j].begin(); it2 != transitions[j].end(); it2++) {
	  if (it2->second == eps_start) {
	    for (itr = next_eps_states.begin(); itr != next_eps_states.end(); itr++)
	      tmp_state.insert(pair<int, int>(it2->first, *itr));
	  }
        }
	/* copy the entries in tmp_state to transitions[j] */
	for (it3 = tmp_state.begin(); it3 != tmp_state.end(); it3++) {
	  if (!is_member(transitions[j], it3->first, it3->second)) {
	    /* is_member() can be improved by using equal_range() */
  	    transitions[j].insert(pair<int, int>(it3->first, it3->second));
	    num_transitions++;
	  }
	}
        /* clear tmp_state */
	tmp_state.clear();  
      }
    } /* endif */
  }
}

/*------VOID NOW--------*/
void multimap_nfa::epsilon_elimination_im2() {
  unsigned int i, j;
  multimap<int, int>::iterator it, it2, it3, it4;
  int eps_start, eps_end, n_trans;
  multimap<int, int> tmp_state, eps_from_st;
  vector<int> next_eps_states;
  vector<int>::iterator itr, it5;
  set<int> eps_reachable_s;
  
  for (i = 0; i < num_states; i++) {
    if ((i % 1000) == 0)
      cout << "At state " << i << " for epsilon eliminations ..." << endl;
#ifdef DEBUG 
    if ((i % 1000) == 0)
      cout << "At state " << i << " for epsilon eliminations ..." << endl;
#endif    
    next_eps_states.clear();
    /* find out all reachable states from i through epsilon transition */
    for (it = transitions[i].begin(); it != transitions[i].end(); it++) {
      if (it->first == -1) { /* found a epsilon transition */
        eps_start = i;
	/* if i is a start state, add the next state to start set */
	//if (starts.find(i) != starts.end()) {
	if (all_starts.find(i) != all_starts.end()) {
	  starts.insert(it->second);
	  /* also insert the eps reachable states of it->second */
	  get_local_eps_reachable(it->second, eps_reachable_s);
          starts.insert(eps_reachable_s.begin(), eps_reachable_s.end());
  	  eps_reachable_s.clear();
	  //cout << "starts insert " << it->second << endl;
	}
#ifdef TRACE_STARTS
  set<int>::iterator its = starts.begin();
  cout << "set of starts ---- state " << i << endl; 
  cout << "it->first " << it->first << "it->second " << it->second << endl;
  while (its != starts.end()) {
    cout << *its << " ";
    its++;
  }
  cout << endl; 
#endif
	next_eps_states.push_back(it->second);
 	transitions[i].erase(it);
	num_transitions--;
      }
    }
#ifdef DEBUG
  cout << "next_eps_states --------" << endl;
  for (itr = next_eps_states.begin(); itr != next_eps_states.end(); itr++)
    cout << *itr << " ";
  cout << endl;
#endif
    if (!next_eps_states.empty()) {
      if (is_start(i)) {
	j = get_eps_to_start(i);
#ifdef DEBUG
  cout << "after get_eps_to_start(), j = " << j << endl;
#endif
        if (j != -1) {
	  for (itr = next_eps_states.begin(); itr != next_eps_states.end(); itr++)
	    tmp_state.insert(pair<int, int>(-1, *itr));
	/* copy the entries in tmp_state to transitions[j] */
	  for (it3 = tmp_state.begin(); it3 != tmp_state.end(); it3++) {
  	    transitions[j].insert(pair<int, int>(it3->first, it3->second));
	    num_transitions++;
	  }
          /* clear tmp_state */
	  tmp_state.clear();
	} /*endif j!=-1*/
      } else if (is_real_accept(i)) { /* real accepting state, not added */
#ifdef DEBUG
  cout << i << " is an accepting state" << endl;
#endif
	/* locally search */
	eps_from_st.clear();
	local_search_eps_from(i, eps_from_st);
	for (it4 = eps_from_st.begin(); it4 != eps_from_st.end(); it4++) {
	  for (it5 = added_acc_sts.begin(); it5 != added_acc_sts.end(); it5++) {
	    if (i < *it5) {
	      transitions[it4->first].insert(pair<int, int>(it4->second, *it5));
	      num_transitions++;
	    }
	  }
	}
      } else { /* internal node */
	/* locally search */
	eps_from_st.clear();
	local_search_eps_from(i, eps_from_st);
#ifdef DEBUG
  cout << i << " is an internal node" << endl;
  cout << "eps_from_st -----" << endl;
  for (it = eps_from_st.begin(); it != eps_from_st.end(); it++)
    cout << it->first << "->" << it->second << endl;
#endif
	for (it4 = eps_from_st.begin(); it4 != eps_from_st.end(); it4++) {
	  for (itr = next_eps_states.begin(); itr != next_eps_states.end(); itr++) {
	    /* check duplicity if possible */
	    transitions[it4->first].insert(pair<int, int>(it4->second, *itr));
	    num_transitions++;
	  }
	}
      }

    } /* endif */
  }
}

/* This is a modified version of im2 which has a start bug */
void multimap_nfa::epsilon_elimination_im3() {
  unsigned int i;
  int j;
  multimap<int, int>::iterator it, it2, it3, it4, it6;
  int eps_end, n_trans;
  multimap<int, int> tmp_state, eps_from_st;
  vector<int> next_eps_states;
  vector<int>::iterator itr, it5;
  set<int> eps_reachable_s;
  pair<multimap<int, int>::iterator, multimap<int, int>::iterator> ret;
  int dup = 0;
  
  for (i = 0; i < num_states; i++) {
    if ((i % 1000) == 0)
      cout << "At state " << i << " for epsilon eliminations ..." << endl;
    
    next_eps_states.clear();
    /* find out all reachable states from i through epsilon transition */
    for (it = transitions[i].begin(); it != transitions[i].end(); it++) {
      if (it->first == -1) { /* found a epsilon transition */
	next_eps_states.push_back(it->second);
 	transitions[i].erase(it);
	num_transitions--;
      }
    }

    if (!next_eps_states.empty()) {
      if (is_real_accept(i)) { /* real accepting state, not added */
	/* locally search */
	eps_from_st.clear();
	local_search_eps_from(i, eps_from_st);
	for (it4 = eps_from_st.begin(); it4 != eps_from_st.end(); it4++) {
	  for (it5 = added_acc_sts.begin(); it5 != added_acc_sts.end(); it5++) {
	    if (i < *it5) {
	      transitions[it4->first].insert(pair<int, int>(it4->second, *it5));
	      num_transitions++;
	    }
	  }
	}
      } else { /* internal node */
	/* locally search */
	eps_from_st.clear();
	local_search_eps_from(i, eps_from_st);
	for (it4 = eps_from_st.begin(); it4 != eps_from_st.end(); it4++) {
	  for (itr = next_eps_states.begin(); itr != next_eps_states.end(); itr++) {
	    /* check duplicity if possible */
	    ret = transitions[it4->first].equal_range(it4->second);
	    dup = 0;
	    for (it6 = ret.first; it6 != ret.second; ++it6) {
	      if (*itr == it6->second) {
		dup = 1;
		break;
	      }
	    }
	    if (dup == 0) {
	      transitions[it4->first].insert(pair<int, int>(it4->second, *itr));
	      num_transitions++;
	    }
	  }
	}
      }

    } /* endif */
  }
}

int multimap_nfa::get_closest_added_start(int st) {
  unsigned int i = 0;
  while ((st >= added_starts[i]) && (i < added_starts.size())) {
    i++;
  }
cout << "i = " << i << endl;
cout << "num of added starts " << added_starts.size() << endl;
  if (i < added_starts.size())
    return added_starts[i];
  else 
    return -1;
}

bool multimap_nfa::is_start(int s) {
  //if (all_starts.find(s) != all_starts.end()) {
  if (starts.find(s) != starts.end()) {
    return true;
  }
  return false;
}

bool multimap_nfa::is_real_accept(int s) {
  if (accept_ids.find(s) != accept_ids.end()) {
#ifdef DEBUG
  cout << "is_real_accept() return true" << endl;
#endif
    return true;
  }
#ifdef DEBUG
  cout << "is_real_accept() return false" << endl;
#endif
  return false;
}

int multimap_nfa::get_eps_to_start(int s) {
  multimap<int, int>::iterator it;
  if ( (it = eps_starts_map.find(s)) != eps_starts_map.end() )
    return it->second;
  //cerr << "get_eps_to_start: not found for state " << s << endl;
  return -1;
}

void multimap_nfa::local_search_eps_from(int s, multimap<int, int>& states_sym) {
  multimap<int, int>::iterator it;
  int i, start, end;

  /* no need to do search for added accepting states */
  if (!is_added_acc_state(s)) {
    get_local_range(s, start, end);
#ifdef DEBUG
  cout << "local_search_eps_from ----" << endl;
  cout << "start = " << start << " end = " << end << endl;
#endif
  for (i = start; i <= end; i++) {
    for (it = transitions[i].begin(); it != transitions[i].end(); it++) {
      if (it->second == s)
        states_sym.insert(pair<int, int>(i, it->first));
    }
  }
  } /* endif */
}

void multimap_nfa::get_local_eps_reachable(int s, set<int>& eps_r) {
  int i, start, end;
  multimap<int, int>::iterator it;
     
  if ((!is_added_acc_state(s)) && (!is_added_start_state(s))) {
    get_local_range(s, start, end);

    for (i = start; i <= end; i++) {
      if (i == s) {
      for (it = transitions[i].begin(); it != transitions[i].end(); it++) {
        if (it->first == -1)	/* find a epsilon transition */
          eps_r.insert(it->second);
      }
      } /*endif i==s*/
    } 
  }
}

bool multimap_nfa::is_added_acc_state(int s) {
  int i;
/* performance can be improved if added_acc_sts is a set */
  for (i = 0; i < added_acc_sts.size(); i++) {
    if (s == added_acc_sts[i])
      return true;
  }
  return false;
}

bool multimap_nfa::is_added_start_state(int s) {
  int i;
/* performance can be improved if added_acc_sts is a set */
  for (i = 0; i < added_starts.size(); i++) {
    if (s == added_starts[i])
      return true;
  }
  return false;
}

void multimap_nfa::get_local_range(const int s, int& start, int& end) {
  multimap<int, int>::iterator it;
  for (it = local_states_num_ranges.begin(); it != local_states_num_ranges.end(); it++) {
    if ((s >= it->first) && (s <= it->second)) {
      start = it->first;
      end = it->second;
      return;
    }
  }
  cerr << "get_local_range: not found for state " << s << endl;
  exit(1);
}

bool is_member(multimap<int, int> mm, int k, int val) {
  multimap<int, int>::iterator it;
  it = mm.begin();
  while (it != mm.end()) {
    if ((it->first == k) && (it->second == val))
      return true;
    it++;
  }
  return false;
}

/*-------------- Non-member functions ---------------*/
void display_map(multimap<int, int> p) {
  multimap<int, int>::iterator it = p.begin();
  while(it != p.end()) {
    cout << "symbol = " << it->first << "-->" << " state[" << it->second << "]" << endl;
    it++;
  }
}

void offset_state(multimap<int, int>& p, int offset) {
  multimap<int, int>::iterator it = p.begin();
  while (it != p.end()) {
    /* it->first is a symbol */
    it->second += offset;
    it++;
  }
}

/* unused now */
void regex_combination(const char *refile, char *obj_regex) {
  ifstream src_re;
  char tmp_line[LINE_LEN];
  char tmp_re[LINE_LEN];

  memset(tmp_line, 0, LINE_LEN);
  memset(tmp_re, 0, LINE_LEN);

  src_re.open(refile);
  if (!src_re) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }

  /* read the regex per line */
  while (src_re.getline(tmp_line, LINE_LEN)) {
    /* remove the / at the ends of a regex */
    remove_slash(tmp_re, tmp_line);
    /* add | to object regex before concatenation */
    add_bar(obj_regex);
    strcat(obj_regex, tmp_re);
    memset(tmp_line, 0, LINE_LEN);
    memset(tmp_re, 0, LINE_LEN);
  }

  /* remove the beginning bar */
  if (obj_regex[0] == '|') {
    memmove(obj_regex, obj_regex + 1, strlen(obj_regex) - 1);
    obj_regex[strlen(obj_regex) - 1] = '\0';
  }
  
}

/* unused now */
void remove_slash(char *dst_str, const char *src_str) {
  if (src_str[0] == '/')
    strcpy(dst_str, src_str + 1);
  else
    strcpy(dst_str, src_str);

  if (dst_str[strlen(dst_str) - 1] == '/') {
#ifdef DEBUG
  cout << "dst_str" << dst_str << endl;
#endif
    dst_str[strlen(dst_str) - 1] = '\0'; 
#ifdef DEBUG
  cout << "dst_str" << dst_str << endl;
#endif
  }
}

/* unused now */
void add_bar(char *s) {
  int slen;
  slen = strlen(s);
  s[slen] = '|';
}

void generate_test_strings(const char *src, const char *alphabet, size_t pos_to_insert, size_t num_str, char *outfile) {
  FILE *f;
  size_t src_len, alph_len, i;
  char tmp_str[MAX_STRING_LEN], str_to_insert[MAX_STRING_LEN];
  int where_to_add, len_to_insert;
  /* get the length of source string */
  src_len = strlen(src);
  /* get the length of alphabet string */
  alph_len = strlen(alphabet);

  f = fopen(outfile, "w");
  if (f == NULL) {
    printf("%s Failed to open a file to write!", __FUNCTION__);
    exit(-1);
  }

  memset(tmp_str, 0, MAX_STRING_LEN);
  strcpy(tmp_str, src);
  /* the first line says how many strings will be generated */
  fprintf(f, "%u\n", num_str);
  /* followed by the generated string */
  fprintf(f, "%s\n", src);
  
  srand(time(0));
  for (i = 1; i < num_str; i++) {
    where_to_add = rand() % 3;
    len_to_insert = rand() % alph_len;
    memset(str_to_insert, 0, MAX_STRING_LEN);
    rand_string(alphabet, len_to_insert, str_to_insert);
    
    memset(tmp_str, 0, MAX_STRING_LEN);
    switch (where_to_add) {
      /* insert to the front of the src string */
      case 0: 	memcpy(tmp_str, str_to_insert, len_to_insert);
		strcpy(tmp_str + len_to_insert, src);
		break;
      /* insert to the mid of the src string */
      case 1:   memcpy(tmp_str, src, pos_to_insert);
		memcpy(tmp_str + pos_to_insert, str_to_insert, len_to_insert);
		strcpy(tmp_str + pos_to_insert + len_to_insert, src + pos_to_insert);
		break;
      /* append to the src string */
      case 2: 	memcpy(tmp_str, src, src_len);
		strcpy(tmp_str + src_len, str_to_insert);
		break;
      default: printf("unknow position to add\n");
	       break;
    }
    /* output the tmp_str */
    fprintf(f, "%s\n", tmp_str);

  }

}

void rand_string(const char *alphabet, const size_t len, char *obj_str) {
  size_t i, alph_len;
  int j;

  alph_len = strlen(alphabet);
  for (i = 0; i < len; i++) {
    j = rand() % alph_len; 
    obj_str[i] = alphabet[j];
  }
  obj_str[i] = '\0';
}

int cputime()
{
  struct rusage rus;

  getrusage (RUSAGE_SELF, &rus);
  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
}

bool dfa_trans_t::fill_trans_tab(multimap_nfa& nfa) {
  unsigned int i, j, n_acc;
  set<int> s_next;
  pair< multimap<int,int>::iterator, multimap<int,int>::iterator > ret;
  multimap<int, int>::iterator itv;

  this->trans_tab = new unsigned int *[nfa.num_states];

  for (i = 0; i < nfa.num_states; i++)
    this->trans_tab[i] = new unsigned int[ALPHABET_SIZE];

  /* fill out the table */
  for (i = 0; i < nfa.num_states; i++) {
    for (j = 0; j < ALPHABET_SIZE; j++) {
      nfa.delta((int)i, (int)j, s_next);
      if (!s_next.empty())
	/* not in DFA the next set of states only contains ONE state */
        this->trans_tab[i][j] = *s_next.begin();
      s_next.clear();
    }
  }

  /* fill out the accepting id's table */
  this->acc = new int *[nfa.num_states];

  for (i = 0; i < nfa.num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = nfa.accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      this->acc[i] = new int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        this->acc[i][j++] = itv->second;
      }
      this->acc[i][j] = -1;
    } else {
      this->acc[i] = 0;
    }
    
  }

  this->num_states = nfa.num_states;
  this->start = *(nfa.starts.begin());
  
  return true;
}

void dfa_trans_t::fill_trans_tab(char *f_dfa) {
  unsigned int num_accept_ids, num_accept_states, num_transitions;
  unsigned int tmp_st, tmp_acc_id, tmps;
  multimap<unsigned int, unsigned int> accept_ids; 
  vector<unsigned int> accepting_states;
  unsigned int cur_state, next_state, sym, n_acc;
  unsigned int i, j;
  pair< multimap<unsigned int,unsigned int>::iterator, multimap<unsigned int,unsigned int>::iterator > ret;
  multimap<unsigned int, unsigned int>::iterator itv;

  ifstream nfa_file;
  nfa_file.open(f_dfa);
  if (!nfa_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }   

  /* read num of states and num of accept ids (signature ids) */
  nfa_file >> this->num_states >> num_accept_ids;
  for (i = 0; i < num_accept_ids; i++) {
    nfa_file >> tmp_st >> tmp_acc_id;
    accept_ids.insert(pair<unsigned int, unsigned int>(tmp_st, tmp_acc_id));
  }
  
  /* read the accepting states */
  nfa_file >> num_accept_states;
  for (i = 0; i < num_accept_states; i++) {
    nfa_file >> tmps;
    //accepting_states.push_back(tmps);
  }

  /* read the start state */
  nfa_file >> this->start;
   
  /* read transitions and put them in a lookup-table */
  nfa_file >> num_transitions;

  this->trans_tab = new unsigned int *[num_states];

  for (i = 0; i < num_states; i++)
    this->trans_tab[i] = new unsigned int[ALPHABET_SIZE];

  for (i = 0; i < num_transitions; i++) {
    /* read one transition */
    nfa_file >> cur_state >> sym >> next_state;
    trans_tab[cur_state][sym] = next_state;
  }
  nfa_file.close();

  /* fill out the accepting id's table */
  this->acc = new int *[num_states];

  for (i = 0; i < num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      this->acc[i] = new int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        this->acc[i][j++] = itv->second;
      }
      this->acc[i][j] = -1;
    } else {
      this->acc[i] = 0;
    }
  }

}

void dfa_trans_t::simulate(const unsigned char *buf, unsigned int len) const {
  unsigned int cur, i, j;

  cur = this->start;
  for (i = 0; i < len; i++) {
    cur = this->trans_tab[cur][buf[i]];

    if (this->acc[cur]) {
      j = 0;
      //printf("Matched at offset %u: ", i);
      while (this->acc[cur][j] != -1) {
        printf("Matched at offset %u: ", i);
        //printf("sig-%d ", this->acc[cur][j]);
        printf("%d\n", this->acc[cur][j]);
        j++;
      }
      //printf("\n");
    }
  }

}

bool nfa_trans_set_t::fill_trans_tab(multimap_nfa& nfa) {
  unsigned int i, j, n_acc, trans_tab_sz = 0;
  set<int> s_next;
  set<int>::iterator it; 
  pair< multimap<int,int>::iterator, multimap<int,int>::iterator > ret;
  multimap<int, int>::iterator itv;

  this->trans_tab = new set<unsigned int> *[nfa.num_states];

  for (i = 0; i < nfa.num_states; i++)
    this->trans_tab[i] = new set<unsigned int>[ALPHABET_SIZE];

  /* fill out the table */
  for (i = 0; i < nfa.num_states; i++) {
    for (j = 0; j < ALPHABET_SIZE; j++) {
      nfa.delta((int)i, (int)j, s_next);
      if (!s_next.empty()) {
	for (it = s_next.begin(); it != s_next.end(); it++) {
          this->trans_tab[i][j].insert((unsigned int)*it);
	}
      }
      s_next.clear();
    }
  }

  /* measure the size of transition table */
  for (i = 0; i < num_states; i++) {
    for (j = 0; j < ALPHABET_SIZE; j++) {
      trans_tab_sz += sizeof(trans_tab[i][j]);
      if (!trans_tab[i][j].empty())
        trans_tab_sz += trans_tab[i][j].size() * sizeof(unsigned int);
    }
  }
  cout << "Memory usage of trans tab " << trans_tab_sz << " bytes" << endl;
  /* fill out the accepting id's table */
  this->acc_ids = new int *[nfa.num_states];

  for (i = 0; i < nfa.num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = nfa.accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      this->acc_ids[i] = new int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        this->acc_ids[i][j++] = itv->second;
      }
      this->acc_ids[i][j] = -1;
    } else {
      this->acc_ids[i] = 0;
    }
    
  }

  this->num_states = nfa.num_states;
  for (it = nfa.starts.begin(); it != nfa.starts.end(); it++)
    this->starts.insert(*it);
  
  return true;

}

void nfa_trans_set_t::fill_trans_tab(char *f_nfa) {
  unsigned int num_states, num_accept_ids, num_accept_states, num_starts, num_transitions;
  unsigned int tmp_st, tmp_acc_id, tmps;
  multimap<unsigned int, unsigned int> accept_ids;
  vector<unsigned int> accepting_states;
  pair< multimap<unsigned int, unsigned int>::iterator, multimap<unsigned int,unsigned int>::iterator > ret;
  multimap<unsigned int, unsigned int>::iterator itv;
  unsigned int cur_state, next_state, sym, n_acc;
  unsigned int num_filled = 0, trans_tab_sz = 0;
  unsigned int i, j;

  ifstream nfa_file;
  nfa_file.open(f_nfa);
  if (!nfa_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }   

  /* read num of states and num of accept ids (signature ids) */
  nfa_file >> num_states >> num_accept_ids;
  for (i = 0; i < num_accept_ids; i++) {
    nfa_file >> tmp_st >> tmp_acc_id;
    accept_ids.insert(pair<int, int>(tmp_st, tmp_acc_id));
  }
  
  /* read the accepting states */
  nfa_file >> num_accept_states;
  for (i = 0; i < num_accept_states; i++) {
    nfa_file >> tmps;
    //accepting_states.push_back(tmps);
  }

  /* read the start states (may be multiple for NFA) */
  nfa_file >> num_starts;
  if (num_starts == 0) {
    cerr << "Invalid number of starts" << endl;
    exit(1);
  }
  for (i = 0; i < num_starts; i++) {
    nfa_file >> tmps;
    starts.insert(tmps);
  }
   
  /* read transitions and put them in a lookup-table */
  nfa_file >> num_transitions;
  trans_tab = new set<unsigned int> *[num_states];

  for (i = 0; i < num_states; i++)
    trans_tab[i] = new set<unsigned int>[ALPHABET_SIZE];

  for (i = 0; i < num_transitions; i++) {
    /* read one transition */
    nfa_file >> cur_state >> sym >> next_state;
    trans_tab[cur_state][sym].insert(next_state);
  }
  nfa_file.close();

  /* measure the size of transition table */
  for (i = 0; i < num_states; i++) {
    for (j = 0; j < ALPHABET_SIZE; j++) {
      trans_tab_sz += sizeof(trans_tab[i][j]);
      if (!trans_tab[i][j].empty())
        trans_tab_sz += trans_tab[i][j].size() * sizeof(unsigned int);
    }
  }
  cout << "Memory usage of trans tab " << trans_tab_sz << " bytes" << endl;

  /* fill out the accepting id's table */
  acc_ids = new int *[num_states];
  for (i = 0; i < num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      acc_ids[i] = new int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        acc_ids[i][j++] = itv->second;
      }
      acc_ids[i][j] = -1;
    } else {
      acc_ids[i] = 0;
    }
  }

}

void nfa_trans_set_t::simulate(const unsigned char *buf, unsigned int len) const {
  set<unsigned int> cur_st, next_st;
  set<unsigned int>::iterator it;
  unsigned int i, j;

  cur_st = this->starts;

  for (i = 0; i < len; i++) {
    /* this is the most time consuming part */
    for (it = cur_st.begin(); it != cur_st.end(); it++) {
      next_st.insert(this->trans_tab[*it][buf[i]].begin(), this->trans_tab[*it][buf[i]].end());
    }
    /* check for acceptance */
    for (it = next_st.begin(); it != next_st.end(); it++) {
      if (this->acc_ids[*it]) {
      j = 0;
      printf("Matched at offset %u: ", i);
      while (this->acc_ids[*it][j] != -1) {
        printf("%d ", this->acc_ids[*it][j]);
        j++;
      }
      printf("\n"); 
      }
    }
    cur_st = next_st;
    next_st.clear();
  }

}

void nfa_trans_set_t::fill_ptrans_tab(char *f_nfa) {

  unsigned int num_states, num_accept_ids, num_accept_states, num_starts, num_transitions;
  unsigned int tmp_st, tmp_acc_id, tmps;
  multimap<unsigned int, unsigned int> accept_ids;
  vector<unsigned int> accepting_states;
  pair< multimap<unsigned int, unsigned int>::iterator, multimap<unsigned int,unsigned int>::iterator > ret;
  multimap<unsigned int, unsigned int>::iterator itv;
  unsigned int cur_state, next_state, sym, n_acc;
  unsigned int num_filled = 0, trans_tab_sz = 0;
  unsigned int i, j;

  ifstream nfa_file;
  nfa_file.open(f_nfa);
  if (!nfa_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }   

  /* read num of states and num of accept ids (signature ids) */
  nfa_file >> num_states >> num_accept_ids;
  for (i = 0; i < num_accept_ids; i++) {
    nfa_file >> tmp_st >> tmp_acc_id;
    accept_ids.insert(pair<int, int>(tmp_st, tmp_acc_id));
  }
  
  /* read the accepting states */
  nfa_file >> num_accept_states;
  for (i = 0; i < num_accept_states; i++) {
    nfa_file >> tmps;
    //accepting_states.push_back(tmps);
  }

  /* read the start states (may be multiple for NFA) */
  nfa_file >> num_starts;
  if (num_starts == 0) {
    cerr << "Invalid number of starts" << endl;
    exit(1);
  }
  for (i = 0; i < num_starts; i++) {
    nfa_file >> tmps;
    starts.insert(tmps);
  }
   
  /* read transitions and put them in a lookup-table */
  nfa_file >> num_transitions;
  ptrans_tab = new set<unsigned int> **[num_states];
   
  for (i = 0; i < num_states; i++) {
    ptrans_tab[i] = new set<unsigned int> *[ALPHABET_SIZE];
    for (j = 0; j < ALPHABET_SIZE; j++)
      ptrans_tab[i][j] = NULL;
  }

  for (i = 0; i < num_transitions; i++) {
    /* read one transition */
    nfa_file >> cur_state >> sym >> next_state;
    if (ptrans_tab[cur_state][sym] == NULL) 
      ptrans_tab[cur_state][sym] = new set<unsigned int>;
    
    ptrans_tab[cur_state][sym]->insert(next_state);
  }
  nfa_file.close();

  /* measure the size of transition table */
  for (i = 0; i < num_states; i++) {
    for (j = 0; j < ALPHABET_SIZE; j++) {
      if (ptrans_tab[i][j] != NULL) {
        trans_tab_sz += sizeof(set<unsigned int>) + ptrans_tab[i][j]->size() * sizeof(unsigned int);
        num_filled++;
      }
    }
  }

  cout << "Memory usage of trans tab " << trans_tab_sz + (num_states*ALPHABET_SIZE - num_filled) * sizeof(set<unsigned int> *) << " bytes" << endl;

  /* fill out the accepting id's table */
  acc_ids = new int *[num_states];
  for (i = 0; i < num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      acc_ids[i] = new int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        acc_ids[i][j++] = itv->second;
      }
      acc_ids[i][j] = -1;
    } else {
      acc_ids[i] = 0;
    }
  }

}

/* simulatep() uses much less memory than simulate() */
void nfa_trans_set_t::simulatep(const unsigned char *buf, unsigned int len) const {
  set<unsigned int> cur_st, next_st;
  set<unsigned int> *p_cur_st, *p_next_st, *p_tmp;
  set<unsigned int>::iterator it, it2;
  unsigned int i, j;

  cur_st = this->starts;
  p_cur_st = &cur_st;
  p_next_st = &next_st;

  for (i = 0; i < len; i++) {
    /* this is the most time consuming part */
    //for (it = cur_st.begin(); it != cur_st.end(); it++) {
    for (it = p_cur_st->begin(); it != p_cur_st->end(); it++) {
      if (this->ptrans_tab[*it][buf[i]] != NULL) {
        //next_st.insert(this->ptrans_tab[*it][buf[i]]->begin(), this->ptrans_tab[*it][buf[i]]->end());
        p_next_st->insert(this->ptrans_tab[*it][buf[i]]->begin(), this->ptrans_tab[*it][buf[i]]->end());
      }
    }
    /* check for acceptance */
    //for (it = next_st.begin(); it != next_st.end(); it++) {
    for (it = p_next_st->begin(); it != p_next_st->end(); it++) {
      if (this->acc_ids[*it]) {
        //printf("matched payload: %s\n", buf+i);
      j = 0;
      printf("Matched at offset %u: ", i);
      while (this->acc_ids[*it][j] != -1) {
        printf("%d ", this->acc_ids[*it][j]);
        j++;
      }
      printf("\n"); 
      }
    }
    //cur_st = next_st;
    //next_st.clear();
    p_tmp = p_cur_st;
    p_cur_st = p_next_st;
    p_next_st = p_tmp;
    p_next_st->clear();

  }
}



/* this is for multi-thread simulation */
void *nfa_trans_set_t::mt_simulate(simul_thread_data *payload_and_starts) {
  set<unsigned int> cur_st, next_st;
  set<unsigned int>::iterator it, it2;
  unsigned int i, j, payload_sz;
  unsigned char *buf;

  buf = payload_and_starts->payload;
  payload_sz = payload_and_starts->len;
  cur_st = *(payload_and_starts->starts);

  for (i = 0; i < payload_sz; i++) {
    /* this is the most time consuming part */
    for (it = cur_st.begin(); it != cur_st.end(); it++) {
      if (this->ptrans_tab[*it][buf[i]] != NULL) {
        next_st.insert(this->ptrans_tab[*it][buf[i]]->begin(), this->ptrans_tab[*it][buf[i]]->end());
      }
    }
    /* check for acceptance */
    for (it = next_st.begin(); it != next_st.end(); it++) {
      if (this->acc_ids[*it]) {
      j = 0;
      printf("Matched at offset %u: ", i);
      while (this->acc_ids[*it][j] != -1) {
        printf("%d ", this->acc_ids[*it][j]);
        j++;
      }
      printf("\n"); 
      }
    }
    cur_st = next_st;
    next_st.clear();
  }
}


void nfa_trans_mm_t::delta(unsigned int state, unsigned char sym, set<unsigned int>& s_next) {

  multimap<unsigned char, unsigned int>::iterator it;
  pair< multimap<unsigned char, unsigned int>::iterator, multimap<unsigned char, unsigned int>::iterator > ret;

  ret = trans_tab[state].equal_range(sym);
  //s_next.insert(ret.first->second, ret.second->second);
  for (it = ret.first; it != ret.second; ++it) {
    s_next.insert(it->second);
  }

}

void nfa_trans_mm_t::fill_trans_tab(char *f_nfa) {

  unsigned int num_states, num_accept_ids, num_accept_states, num_starts;
  long num_transitions;
  unsigned int tmp_st, tmp_acc_id, tmps;
  multimap<unsigned int, unsigned int> accept_ids;
  vector<unsigned int> accepting_states;
  pair< multimap<unsigned int, unsigned int>::iterator, multimap<unsigned int,unsigned int>::iterator > ret;
  multimap<unsigned int, unsigned int>::iterator itv;
  unsigned int cur_state, next_state, sym, n_acc;
  unsigned int num_filled = 0, trans_tab_sz = 0;
  unsigned int i, j;

  ifstream nfa_file;
  nfa_file.open(f_nfa);
  if (!nfa_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }   

  /* read num of states and num of accept ids (signature ids) */
  nfa_file >> num_states >> num_accept_ids;
  for (i = 0; i < num_accept_ids; i++) {
    nfa_file >> tmp_st >> tmp_acc_id;
    accept_ids.insert(pair<int, int>(tmp_st, tmp_acc_id));
  }
  
  /* read the accepting states */
  nfa_file >> num_accept_states;
#ifdef TRACE_SIMUL
  cout << "fill_trans_tab: number of acc states " << num_accept_states << endl;
#endif
  for (i = 0; i < num_accept_states; i++) {
    nfa_file >> tmps;
    //accepting_states.push_back(tmps);
  }

  /* read the start states (may be multiple for NFA) */
  nfa_file >> num_starts;
#ifdef TRACE_SIMUL
  cout << "fill_trans_tab: number of starts " << num_starts << endl;
#endif
  if (num_starts == 0) {
    cerr << "Invalid number of starts" << endl;
    exit(1);
  }
  for (i = 0; i < num_starts; i++) {
    nfa_file >> tmps;
    starts.insert(tmps);
  }

  /* read transition */
  nfa_file >> num_transitions;  
#ifdef TRACE_SIMUL
  cout << "read num of starts i = " << i << endl;
  cout << "fill_trans_tab: number of trans " << num_transitions << endl;
#endif

  trans_tab = new multimap<unsigned char, unsigned int> [num_states];

  for (i = 0; i < num_transitions; i++) {
    /* read one transition */
    nfa_file >> cur_state >> sym >> next_state;
    trans_tab[cur_state].insert(pair<unsigned char, unsigned int>(sym, next_state));
  }
  nfa_file.close();

  /* measure the size of transition table */
  for (i = 0; i < num_states; i++) {
    trans_tab_sz += sizeof(trans_tab[i]) + (sizeof(unsigned char) + sizeof(unsigned int)) * trans_tab[i].size();
  }

  cout << "Memory usage of trans tab " << trans_tab_sz << " bytes" << endl;

  /* fill out the accepting id's table */
  acc_ids = new unsigned int *[num_states];

  for (i = 0; i < num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      acc_ids[i] = new unsigned int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        acc_ids[i][j++] = itv->second;
      }
      acc_ids[i][j] = -1;
    } else {
      acc_ids[i] = 0;
    }
  }
}

void nfa_trans_mm_t::simulate(const unsigned char *buf, unsigned int len) {
  set<unsigned int> cur_st, next_st;
  set<unsigned int> *p_cur_st, *p_next_st, *p_tmp;
  set<unsigned int>::iterator it;
  multimap<unsigned char, unsigned int>::iterator itm;
  pair< multimap<unsigned char, unsigned int>::iterator, multimap<unsigned char, unsigned int>::iterator > ret;
  unsigned int i, j;
#ifdef TRACE_SIMUL
  unsigned long long starttm = 0;
  unsigned long long stoptm = 0;
  unsigned int t_start = 0, t_end = 0;
#endif

  cur_st = starts;
  p_cur_st = &cur_st;
  p_next_st = &next_st;

#ifdef TRACE_SIMUL
  //rdtsc(starttm);
  cout << "number of starts = " << starts.size() << endl;
  t_start = cputime();
#endif
  for (i = 0; i < len; i++) {
    //it = cur_st.begin();
    it = p_cur_st->begin();
    while (it != p_cur_st->end()) {
      //delta(*it, buf[i], next_st);
      ret = trans_tab[*it].equal_range(buf[i]);
      for (itm = ret.first; itm != ret.second; ++itm) {
        //next_st.insert(itm->second);
        p_next_st->insert(itm->second);
      }
      it++;
    }

    /* check for acceptance */
    //for (it = next_st.begin(); it != next_st.end(); it++) {
    for (it = p_next_st->begin(); it != p_next_st->end(); it++) {
      if (this->acc_ids[*it]) {
      j = 0;
      printf("Matched at offset %u: ", i);
      while (this->acc_ids[*it][j] != -1) {
        printf("%d ", this->acc_ids[*it][j]);
        j++;
      }
      printf("\n"); 
      }
    }

      p_tmp = p_cur_st;
      p_cur_st = p_next_st;
      p_next_st = p_tmp;
      p_next_st->clear();
      //cur_st = next_st;
      //next_st.clear();
    /* go to the next symbol */	
  }
#ifdef TRACE_SIMUL
  //rdtsc(stoptm);
  t_end = cputime();
  cout << "i = " << i << endl;
  cout << "exec time for current payload: " << t_end - t_start << " ms" << endl;
#endif
}

void nfa_map_t::fill_trans_tab(char *f_trans) {

  ifstream nfa_file;
  unsigned int num_accept_ids, num_accept_states, num_starts;
  unsigned int tmp_st, tmp_acc_id, tmps;
  //multimap<unsigned int, unsigned int> accept_ids;
  unsigned int cur_state, next_state, sym, n_acc;
  set<unsigned int>::iterator it;
  set<unsigned int> tmp_set;
  pair< multimap<unsigned int, unsigned int>::iterator, multimap<unsigned int,unsigned int>::iterator > ret;
  multimap<unsigned int, unsigned int>::iterator itv;
  map<unsigned char, set<unsigned int> >::iterator itm;
  unsigned int i, j, trans_tab_sz = 0;

  nfa_file.open(f_trans);
  if (!nfa_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }  
  
  /* read num of states and num of accept ids (signature ids) */
  nfa_file >> num_states >> num_accept_ids;
  for (i = 0; i < num_accept_ids; i++) {
    nfa_file >> tmp_st >> tmp_acc_id;
    accept_ids.insert(pair<unsigned int, unsigned int>(tmp_st, tmp_acc_id));
  }
  
  /* read the accepting states */
  nfa_file >> num_accept_states;
  for (i = 0; i < num_accept_states; i++) {
    nfa_file >> tmps;
    accepting_states.insert(tmps);
  }

  /* read the start states (may be multiple for NFA) */
  nfa_file >> num_starts;
  if (num_starts == 0) {
    cerr << "Invalid number of starts" << endl;
    exit(1);
  }
  for (i = 0; i < num_starts; i++) {
    nfa_file >> tmps;
    starts.insert(tmps);
  }

  /* read transitions and put them in a lookup-table */
  nfa_file >> num_transitions;  

  trans_tab = new map<unsigned char, set<unsigned int> > [num_states];

  for (i = 0; i < num_transitions; i++) {
    /* read one transition */
    nfa_file >> cur_state >> sym >> next_state;
    itm = trans_tab[cur_state].find(sym);
    if (itm == trans_tab[cur_state].end()) {
      tmp_set.insert(next_state);
      trans_tab[cur_state].insert(pair<unsigned char, set<unsigned int> >(sym, tmp_set));
    } else {
      itm->second.insert(next_state);
    }
    tmp_set.clear();
  }
  nfa_file.close();  

  /* measure the size of transition table */
  for (i = 0; i < num_states; i++) {
    trans_tab_sz += sizeof(trans_tab[i]);
    for (itm = trans_tab[i].begin(); itm != trans_tab[i].end(); itm++) {
      trans_tab_sz += sizeof(unsigned char) + sizeof(set<unsigned int>) + sizeof(unsigned int) * itm->second.size();
    }
  }

  cout << "Memory usage of trans tab in nfa_map_t " << trans_tab_sz << " bytes" << endl;

  /* fill out the accepting id's table */
  acc_ids = new unsigned int *[num_states];

  for (i = 0; i < num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      acc_ids[i] = new unsigned int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        acc_ids[i][j++] = itv->second;
      }
      acc_ids[i][j] = -1;
    } else {
      acc_ids[i] = 0;
    }
  }
  nfa_file.close();
}

void nfa_map_t::simulate(const unsigned char *buf, unsigned int len) {
  set<unsigned int> cur_st, next_st;
  set<unsigned int> *p_cur_st, *p_next_st, *p_tmp;
  set<unsigned int>::iterator it, it2;
  unsigned int i, j;
  map<unsigned char, set<unsigned int> >::iterator itm;

  cur_st = this->starts;
  p_cur_st =  &cur_st;
  p_next_st = &next_st;

  for (i = 0; i < len; i++) {
    /* this is the most time consuming part */
    //for (it = cur_st.begin(); it != cur_st.end(); it++) {
    for (it = p_cur_st->begin(); it != p_cur_st->end(); it++) {
      itm = this->trans_tab[*it].find(buf[i]);
      if (itm != this->trans_tab[*it].end()) {
	//next_st.insert(itm->second.begin(), itm->second.end());
	p_next_st->insert(itm->second.begin(), itm->second.end());
      }
    }
    /* check for acceptance */
    //for (it = next_st.begin(); it != next_st.end(); it++) {
    for (it = p_next_st->begin(); it != p_next_st->end(); it++) {
      if (this->acc_ids[*it]) {
      j = 0;
      printf("Matched at offset %u: ", i);
      while (this->acc_ids[*it][j] != -1) {
        printf("%d ", this->acc_ids[*it][j]);
        j++;
      }
      printf("\n"); 
      }
    }
    p_tmp = p_cur_st;
    p_cur_st = p_next_st;
    p_next_st = p_tmp;
    p_next_st->clear();
    //cur_st = next_st;
    //next_st.clear();
  }
  
}

void nfa_map_t::alphabet_reduction() {
  int max_class = 0;
  unsigned int s;
  unsigned int c;
  map<unsigned char, set<unsigned int> >::iterator itm;
  set< set<unsigned int> >::iterator it;
  int i, j;
  
  /* initialize the mapping */
  for (i = 0; i < ALPHABET_SIZE; i++) 
    class_c[i] = 0;

  /* construct the target sets */
  for (j = 0; j < num_states; j++) {
    for (itm = trans_tab[j].begin(); itm != trans_tab[j].end(); itm++) {
      target_sets.insert(itm->second);
    }
  }  

  for (s = 0; s < num_states; s++) {
    for (it = target_sets.begin(); it != target_sets.end(); it++) {
      bool char_covered[ALPHABET_SIZE] = {false};
      bool class_covered[ALPHABET_SIZE] = {false};
      unsigned char remap[ALPHABET_SIZE] = {0};
      int on_zero = 0;
      for (c = 0; c < ALPHABET_SIZE; c++) {
        if ((unsigned char)c != 0) {
	  if (trans_tab[s].find((unsigned char)c)->second == *it) {
	    char_covered[c] = true;
            if (class_c[c] == 0) {
	      if (on_zero == 0) {
	        on_zero = ++max_class;
	      }
	      class_c[c] = on_zero;
	    } else
	      class_covered[class_c[c]] = true;
	  } 
	}
	
      }
      for (c = 0; c < ALPHABET_SIZE; c++) {
	if (!char_covered[c] && class_covered[class_c[c]]) {
	  if (remap[class_c[c]] == 0) {
	    remap[class_c[c]] = ++max_class;
	  }
	  class_c[c] = remap[class_c[c]];
	}
      }
    }
  } 
}

/* An improved version of alphabet reduction */
void nfa_map_t::alphabet_reduction_im() {
  int max_class = 0;
  unsigned int s;
  unsigned int c;
  map<unsigned char, set<unsigned int> >::iterator itm;
  set< set<unsigned int> > *targets;
  set< set<unsigned int> >::iterator it;
  int i, j;
  
  /* initialize the mapping */
  for (i = 0; i < ALPHABET_SIZE; i++) 
    class_c[i] = 0;

  targets = new set< set<unsigned int> > [num_states];

  /* construct the target sets */
  cout << "num_states " << num_states << endl;
  for (j = 0; j < num_states; j++) {
#ifdef TRACE_ALPHA_REDUCTION
  cout << "j = " << j << endl;
#endif  
    for (itm = trans_tab[j].begin(); itm != trans_tab[j].end(); itm++) {
      targets[j].insert(itm->second);
    }
  }  

  for (s = 0; s < num_states; s++) {
#ifdef TRACE_ALPHA_REDUCTION
  cout << "processing state " << s << " ..." << endl;
#endif  
    for (it = targets[s].begin(); it != targets[s].end(); it++) {
      bool char_covered[ALPHABET_SIZE] = {false};
      bool class_covered[ALPHABET_SIZE] = {false};
      unsigned char remap[ALPHABET_SIZE] = {0};
      int on_zero = 0;
      for (c = 0; c < ALPHABET_SIZE; c++) {
        if ((unsigned char)c != 0) {
	  if (trans_tab[s].find((unsigned char)c)->second == *it) {
	    char_covered[c] = true;
            if (class_c[c] == 0) {
	      if (on_zero == 0) {
	        on_zero = ++max_class;
	      }
	      class_c[c] = on_zero;
	    } else
	      class_covered[class_c[c]] = true;
	  } 
	}
	
      }
      for (c = 0; c < ALPHABET_SIZE; c++) {
	if (!char_covered[c] && class_covered[class_c[c]]) {
	  if (remap[class_c[c]] == 0) {
	    remap[class_c[c]] = ++max_class;
	  }
	  class_c[c] = remap[class_c[c]];
	}
      }
    }
  } 
  cout << " max_class = " << max_class << endl;
}


void nfa_map_t::rdt_trans_tab_gen() {
  map<unsigned char, set<unsigned int> >::iterator itm;
  unsigned int total_bytes = 0;	/* measure the memory cost */
  int i;

  reduced_trans_tab = new map<unsigned char, set<unsigned int> > [num_states];
  total_bytes += num_states * sizeof(map<unsigned char, set<unsigned int> >);

  for (i = 0; i < num_states; i++) {
    for (itm = trans_tab[i].begin(); itm != trans_tab[i].end(); itm++) {
      if (reduced_trans_tab[i].find(class_c[itm->first]) == reduced_trans_tab[i].end()) {
	reduced_trans_tab[i].insert(pair<unsigned char, set<unsigned int> >(class_c[itm->first], itm->second));
	total_bytes += sizeof(unsigned char);
	total_bytes += sizeof(set<unsigned int>);
	total_bytes += itm->second.size() * sizeof(unsigned int);
      }
    }
  }
  cout << "memory cost of reduced transition table = " << total_bytes << " bytes" << endl;
}

void nfa_map_t::dump_rdt_transitions(char *f) {
  ofstream out_file;
  unsigned int i;
  multimap<unsigned int, unsigned int>::iterator it;
  map<unsigned char, set<unsigned int> >::iterator itm;
  set<unsigned int>::iterator its;

  out_file.open(f);
  out_file << num_states << endl;
  /* output the alphabet mapping */
  out_file << ALPHABET_SIZE << endl;
  for (i = 0; i < ALPHABET_SIZE; i++) 
    out_file << i << " " << (unsigned int) class_c[i] << endl;

  out_file << accept_ids.size() << endl;
  /* output the (acc_state_no, accept_id) pairs */
  it = accept_ids.begin();
  while (it != accept_ids.end()) {
    out_file << it->first << " " << it->second << endl;
    it++;
  }
  /* output the accepting states */
  out_file << accepting_states.size();
  for (its = accepting_states.begin(); its != accepting_states.end(); its++)
    out_file << " "<< *its;
  out_file << endl;

  /* output start states and num of transitions */
  out_file << starts.size() << " "; 
  for (its = starts.begin(); its != starts.end(); its++)
    out_file << *its << " ";
  out_file << endl;

  /* count the number of transitions */
  if (reduced_trans_tab == NULL) {
    cerr << "transition table not reduced!" << endl;
    exit(1);
  }
  num_rdt_trans = 0;
  for (i = 0; i < num_states; i++) {
    for (itm = reduced_trans_tab[i].begin(); itm != reduced_trans_tab[i].end(); itm++) 
      num_rdt_trans += itm->second.size(); 
  }  
  out_file << num_rdt_trans << endl;

  /* output transitions */
  for (i = 0; i < num_states - 1; i++) {
    for (itm = reduced_trans_tab[i].begin(); itm != reduced_trans_tab[i].end(); itm++) {
      for (its = itm->second.begin(); its != itm->second.end(); its++) {
        out_file << i << " " << (unsigned int) itm->first << " " << *its << endl;
      }
    } 
  }

  out_file.close();
}

void nfa_map_t::rdt_simulate(const unsigned char *buf, unsigned int len) {
  set<unsigned int> cur_st, next_st;
  set<unsigned int>::iterator it, it2;
  unsigned int i, j;
  map<unsigned char, set<unsigned int> >::iterator itm;

  cur_st = this->starts;

  for (i = 0; i < len; i++) {
    /* this is the most time consuming part */
    for (it = cur_st.begin(); it != cur_st.end(); it++) {
      itm = this->reduced_trans_tab[*it].find(class_c[buf[i]]);
      if (itm != this->reduced_trans_tab[*it].end()) {
	next_st.insert(itm->second.begin(), itm->second.end());
      }
    }
    /* check for acceptance */
    for (it = next_st.begin(); it != next_st.end(); it++) {
      if (this->acc_ids[*it]) {
      j = 0;
      printf("Matched at offset %u: ", i);
      while (this->acc_ids[*it][j] != -1) {
        printf("%d ", this->acc_ids[*it][j]);
        j++;
      }
      printf("\n"); 
      }
    }
    cur_st = next_st;
    next_st.clear();
  }
  
}


