/*-----------------------------------------------------------------------------
 * File:    nfa.cc
 *
 *
 * Author:  Randy Smith
 * Date:    18 May 2007
 *
 *
 *    Copyright 2006,2007 Randy Smith, smithr@cs.wisc.edu
 *
 *    This file contains unpublished confidential proprietary
 *    work of Randy Smith, Department of Computer Sciences,
 *    University of Wisconsin--Madison.  No use of any sort, including
 *    execution, modification, copying, storage, distribution, or reverse
 *    engineering is permitted without the express written consent of
 *    Randy Smith.
 *
 *-----------------------------------------------------------------------------
 * History
 * $Log: nfa.cc,v $
 * Revision 1.6  2010/09/27 14:48:31  lyangru
 * final synchronization
 *
 * Revision 1.1  2009/06/09 18:51:22  vinodg
 * *** empty log message ***
 *
 * Revision 1.6  2008/09/12 17:47:23  smithr
 * Minor changes to maintain compatibility with department OS upgrade
 *
 * Revision 1.5  2008/04/17 23:08:34  smithr
 * Added code to convert from dfa_tab_t to nfa_t
 *
 * Revision 1.4  2008/04/17 21:12:26  smithr
 * Added routines to construct an efficient table representing/matching
 * DFAs.
 *
 * Revision 1.3  2008/02/13 20:33:10  smithr
 * *** empty log message ***
 *
 * Revision 1.2  2007/11/05 17:53:44  smithr
 * Added changes to bring the parser up to full compatibility with
 * the re2xfa parser.  Specifically, [-...] is now handled,
 * counting ranges are better handled, empty alternatives are handled,
 * and hex escapes are limited to two characters.
 *
 * Revision 1.1  2007/08/07 18:00:51  smithr
 * initial check in to CVS
 *
 *
 *---------------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <map>
#include <list>
#include <bitset>
#include <set>
#include <stack>
#include <ext/hash_map>
#include <assert.h>
#include "nfa.h"
#include "nodes.h"
#include "globals.h"

extern int errno;

nfa_t *disjunct_to_nfa(disjunction *re);
nfa_t *alternative_to_nfa(alternative *alt);
nfa_t *term_to_nfa(term *trm);
nfa_t *atom_to_nfa(atom *atm);


/*#############################################################################
 * nfa_state_t implementation
 *###########################################################################*/

nfa_state_t::nfa_state_t(void) {}
nfa_state_t::~nfa_state_t(void) {}


nfa_state_t::nfa_state_t(const nfa_state_t& copy)
{
   for (unsigned int i=0; i < MAX_SYMS; i++)
   {
      this->trans[i] = copy.trans[i];
   }
   this->eps_trans = copy.eps_trans;

   this->accept_id = copy.accept_id;
}



nfa_state_t& nfa_state_t::operator=(const nfa_state_t& rhs)
{
   if (this == &rhs)
      return *this;

   for (unsigned int i=0; i < MAX_SYMS; i++)
   {
      this->trans[i] = rhs.trans[i];
   }
   this->eps_trans = rhs.eps_trans;

   this->accept_id = rhs.accept_id;

   return *this;
}


void nfa_state_t::print_state(FILE *out, unsigned int indent)
{
   char wspace[indent+1];
   unsigned int i;
   std::list<state_id_t>::iterator li;
   
   memset(wspace, ' ', indent);
   wspace[indent] = '\0';


   for (i=0; i < MAX_SYMS; i++)
   {
      if (!trans[i].empty())
      {
	 fprintf(out, "%s%2x(%c): ", wspace, (unsigned char)i,
		 isalpha((unsigned char)i) ? (char)i : '_');

	 for (li = trans[i].begin(); li != trans[i].end(); li++)
	    fprintf(out, "%3u ", *li);
	 fprintf(out, "\n");
      }
   }
   if (!eps_trans.empty())
   {
      fprintf(out, "%s\\eps: ", wspace);
      for (li = eps_trans.begin(); li != eps_trans.end(); li++)
	 fprintf(out, "%3u ", *li);
      fprintf(out, "\n");
   }
   
}




/*#############################################################################
 * nfa_t implementation
 *###########################################################################*/


/*-----------------------------------------------------------------------------
 * nfa_t
 *---------------------------------------------------------------------------*/
nfa_t::nfa_t(void) 
{
   next_id = 0;
}

nfa_t::~nfa_t(void) {}


/*-----------------------------------------------------------------------------
 * nfa_t::clone
 *---------------------------------------------------------------------------*/
nfa_t *nfa_t::clone(void)
{
   nfa_t *n;

   n = new nfa_t();

   n->states    = this->states;
   n->start     = this->start;
   n->accepting = this->accepting;
   n->next_id   = this->next_id;

   return n;
}


/*-----------------------------------------------------------------------------
 * nfa_t::add_state
 *   makes room for a new state.
 *---------------------------------------------------------------------------*/
state_id_t nfa_t::add_state(void)
{
   state_id_t new_state = next_id++;
   nfa_state_t n;

   states.push_back(n);

   assert(states.size() == next_id);

   return new_state;
}


/*-----------------------------------------------------------------------------
 * nfa_t::add_trans
 *   adds a transition.
 *---------------------------------------------------------------------------*/
int nfa_t::add_trans(state_id_t from, state_id_t to, unsigned char sym)
{
   assert(from < states.size());
   assert(to < states.size());

   states[from].trans[sym].push_back(to);

   return 1;
}


/*-----------------------------------------------------------------------------
 * nfa_t::add_eps_trans
 *   adds an epsilon transition.
 *---------------------------------------------------------------------------*/
int nfa_t::add_eps_trans(state_id_t from, state_id_t to)
{
   assert(from < states.size());
   assert(to < states.size());

   states[from].eps_trans.push_back(to);

   return 1;
}


/*-----------------------------------------------------------------------------
 * nfa_t::add_eps_trans
 *   adds epsilon transitions from a set of states to a single state
 *---------------------------------------------------------------------------*/
int nfa_t::add_eps_trans(const std::list<state_id_t>& from, state_id_t to)
{
   assert(to < states.size());
   list<state_id_t>::const_iterator li;

   for (li = from.begin(); li != from.end(); li++)
   {
      states[*li].eps_trans.push_back(to);
   }

   return 1;
}


/*-----------------------------------------------------------------------------
 * nfa_t::add_eps_trans
 *   adds epsilon transitions from a single states to a set of states
 *---------------------------------------------------------------------------*/
int nfa_t::add_eps_trans(state_id_t from, const std::list<state_id_t>& to)
{
   assert(from < states.size());
   std::list<state_id_t>::const_iterator li;

   for (li = to.begin(); li != to.end(); li++)
   {
      states[from].eps_trans.push_back(*li);
   }

   return 1;
}

/*-----------------------------------------------------------------------------
 * nfa_t::set_id
 *   Sets the identity of the nfa and updates the accepting states to
 *   reflect the id.
 *   Note that one effect of calling this is that all accepting states will
 *   have a *single* accept id (the new id) rather than potentially a set
 *   of ids.  NO- that has changed.  it only sets the id.
 *---------------------------------------------------------------------------*/
void nfa_t::set_id(unsigned int id)
{
   this->machine_id = id;
}

/*-----------------------------------------------------------------------------
 * nfa_t::fold_in
 *   This routine "absorbs" a supplied nfa into the current object by
 *   adding all the nfas states to the current object and relabeling the
 *   states and the transitions.  It is a precursor to many of the
 *   "Thompson's Construction" techniques used for producing NFAs from
 *   regular expressions.  Note that this routine does NOT reassign start
 *   states, accepting states, etc.  It simply just adds the states and
 *   makes them "semantically" correct.
 *
 *   [in]  nfa - the nfa_t to be folded in to the current object
 *   [out] ns  - the relabeled starting state
 *   [out] na  - the relabeled accepting states
 *
 *   Return value:  Number of added states.
 *---------------------------------------------------------------------------*/
unsigned int nfa_t::fold_in(nfa_t *nfa, state_id_t&ns, 
			    std::list<state_id_t>& na)
{
   std::map<state_id_t, state_id_t> old_to_new;
   unsigned int i,j;
   std::list<state_id_t>::iterator li;
   state_id_t new_state, new_source, new_dest;

   /* first, copy over all the states */
   for (i=0; i < nfa->states.size(); i++)
   {
      new_state = this->add_state();
      old_to_new[i] = new_state; 
   }

   /* now, copy over the transitions */
   for (i=0; i< nfa->states.size(); i++)
   {
      nfa_state_t& st = nfa->states[i];
      new_source = old_to_new[i];

      /* copy the named transitions */
      for (j=0; j < MAX_SYMS; j++)
      {
	 for (li = st.trans[j].begin(); li != st.trans[j].end(); li++)
	 {
	    new_dest = old_to_new[*li];
	    this->add_trans(new_source, new_dest, j);
	 }
      }

      /* copy the eps transitions */
      for (li = st.eps_trans.begin(); li != st.eps_trans.end(); li++)
      {
	 new_dest = old_to_new[*li];
	 this->add_eps_trans(new_source, new_dest);
      }
   }

   /* finally, relabel start state and accepting states */
   ns = old_to_new[nfa->start];
   na.erase(na.begin(), na.end());
   for (li = nfa->accepting.begin(); li != nfa->accepting.end(); li++)
   {
      na.push_back(old_to_new[*li]);
   }

   return old_to_new.size();
}

/* same as the fold_in() function, but keeping the accept_id's
 * for the purpose of signature matching */
unsigned int nfa_t::fold_in_acc(nfa_t *nfa, state_id_t&ns, 
			    std::list<state_id_t>& na)
{
   std::map<state_id_t, state_id_t> old_to_new;
   unsigned int i,j;
   std::list<state_id_t>::iterator li;
   state_id_t new_state, new_source, new_dest;

   /* first, copy over all the states */
   for (i=0; i < nfa->states.size(); i++)
   {
      new_state = this->add_state();
      old_to_new[i] = new_state; 
   }

   /* now, copy over the transitions */
   for (i=0; i< nfa->states.size(); i++)
   {
      nfa_state_t& st = nfa->states[i];
      new_source = old_to_new[i];

      /* copy the named transitions */
      for (j=0; j < MAX_SYMS; j++)
      {
	 for (li = st.trans[j].begin(); li != st.trans[j].end(); li++)
	 {
	    new_dest = old_to_new[*li];
	    this->add_trans(new_source, new_dest, j);
	 }
      }

      /* copy the eps transitions */
      for (li = st.eps_trans.begin(); li != st.eps_trans.end(); li++)
      {
	 new_dest = old_to_new[*li];
	 this->add_eps_trans(new_source, new_dest);
      }
   }

   /* finally, relabel start state and accepting states */
   ns = old_to_new[nfa->start];
   na.erase(na.begin(), na.end());
   for (li = nfa->accepting.begin(); li != nfa->accepting.end(); li++)
   {
      na.push_back(old_to_new[*li]);
   /* copy the accept_id to new nfa */
      states[old_to_new[*li]].accept_id = nfa->states[*li].accept_id;
   }

   return old_to_new.size();
}


/*-----------------------------------------------------------------------------
 * nfa_t::print_machine
 *---------------------------------------------------------------------------*/
void nfa_t::print_machine(FILE *out)
{
   unsigned int i;
   std::list<state_id_t>::iterator li;

   for (i=0; i < states.size(); i++)
   {
      fprintf(out, "State: %4u\n", i);
      states[i].print_state(out, 4);
   }

   fprintf(out, "Start: %u\n", start);
   fprintf(out, "Accepting: ");
   for (li = accepting.begin(); li != accepting.end(); li++)
   {
      fprintf(out, "%3u ", *li);
   }
   fprintf(out, "\n");
}



/*-----------------------------------------------------------------------------
 * compute_eps_closure
 *  Function computes the epsilon closure (e-closure) of the set of states T,
 *  using Algorithm 3.26 on page 119 of the Dragon Book.
 *
 *  T - set of states for which to compute the e-closure
 *  out - the resulting e-closure
 *
 *  The function returns the number of states in the e-closure.
 *---------------------------------------------------------------------------*/
int nfa_t::compute_eps_closure(const std::set<state_id_t>& T, 
			       std::set<state_id_t>& out)
{
   std::stack<state_id_t> s;
   std::list<state_id_t>::iterator li;
   state_id_t state;

   /* push all states in T onto stack */
   std::set<state_id_t>::const_iterator si;
   for (si = T.begin(); si != T.end(); si++)
   {
      s.push(*si);
   }
   
   /* initialize eps-closure(T) to T */
   out = T;

   while (!s.empty())
   {
      state = s.top(); s.pop();

      /* for each state u with an edge from to to u labeled e do */
      nfa_state_t& st = states[state];
      for(li = st.eps_trans.begin(); li != st.eps_trans.end(); li++)
      {
	 /* if u is not in eps-closure(T) do */
	 if (out.find(*li) == out.end())
	 {
	    out.insert(*li);
	    s.push(*li);
	 }
      }
   }

   return out.size();
}


/*-----------------------------------------------------------------------------
 * compute_eps_closure
 *  Wrapper to the above compute_eps_closure that accepts a single state
 *  as input.
 *---------------------------------------------------------------------------*/
int nfa_t::compute_eps_closure(state_id_t s, std::set<state_id_t>& out)
{
   std::set<state_id_t> T;
   
   T.insert(s);

   return compute_eps_closure(T,out);
}


/*-----------------------------------------------------------------------------
 * compute_move
 *  Function that computes the move operation (page 118 of Dragon book) for
 *  use in determinization.
 *
 *  T - set of states for which to compute the e-closure
 *  out - the resulting e-closure
 *
 *  The function returns the number of states in the e-closure.
 *---------------------------------------------------------------------------*/
int nfa_t::compute_move(const std::set<state_id_t>& T, unsigned int a,
			std::set<state_id_t>& out)
{
   assert(a < MAX_SYMS);
   
   std::set<state_id_t>::const_iterator si;
   std::list<state_id_t>::const_iterator li;

   out.erase(out.begin(), out.end());

   for (si = T.begin(); si != T.end(); si++)
   {
      nfa_state_t& st = states[*si];

      for (li = st.trans[a].begin(); li != st.trans[a].end(); li++)
      {
	 out.insert(*li);
      }
   }      

   return out.size();
}


/* We use a hash map to map from a set of state_id_t's to a single
 * state_id_t.  Need 4 things for this:
 * 1.  a key (state_set_key_t) 
 * 2.  the data (state_id_t) 
 * 3.  an equality operator (state_set_key_eq_t) 
 * 4.  a hash function (state_set_key_hash_t 
 *
 * These data types are at present unneeded outside the use of make_dfa,
 * so they are included here rather than in a header file.
 *
*/

typedef struct state_set_key_t 
{
      state_set_key_t(void) {};
      state_set_key_t(const state_set_key_t& s) {
	 states = s.states;}
      state_set_key_t(const std::set<state_id_t>& s) {
	 states = s;}
      
      std::set<state_id_t> states;
      
} state_set_key_t;

/* unnecessary, and unused */
typedef struct state_set_data_t 
{
      state_set_data_t(void) { s = 0; }
      state_set_data_t(const state_set_data_t& c) { s = c.s; }
      state_set_data_t(state_id_t c) { s = c; }
      state_id_t s;

} state_set_data_t;


typedef struct state_set_key_eq_t
{
      bool operator()(const state_set_key_t& k1, 
		      const state_set_key_t& k2) const 
      {
	 return ( k1.states == k2.states);
      }
} state_set_key_eq_t;


/* operator for hash_map hashing.  This is probably overkill */
typedef struct state_set_key_hash_t
{
      size_t operator()(const state_set_key_t& k1) const 
      {
	 std::set<state_id_t>::const_iterator si;
	 unsigned int primes[] = { 2,  3,  5,  7, 11, 13, 17, 19, 23, 29,
				  31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 
				  73, 79, 83, 89, 97,101,103,107,109,113};
	 unsigned int prsize = sizeof(primes)/sizeof(unsigned int);
	 unsigned int idx =0;
	 unsigned hval = 0;

	 for (si = k1.states.begin(); si != k1.states.end(); si++)
	 {
	    hval = (hval + (*si) * primes[idx++]) % 4297;
	    idx %= prsize;

	    assert(prsize == 30);
	 }

	 return hval;
      }
} state_set_key_hash_t;


/*-----------------------------------------------------------------------------
 * nfa_t::make_dfa
 *---------------------------------------------------------------------------*/
nfa_t *nfa_t::make_dfa(void)
{
   /* Dstate_map maps a set<state_id_t> to a single state_id_t. */
   __gnu_cxx::hash_map<state_set_key_t, 
                       state_id_t,
                       state_set_key_hash_t,
                       state_set_key_eq_t> Dstate_map;

   __gnu_cxx::hash_map<state_set_key_t, 
                       state_id_t,
                       state_set_key_hash_t,
                       state_set_key_eq_t>::iterator  Dstate_map_iter;

   std::map<state_id_t, std::set<state_id_t> > Dstate_inv_map;
   std::list<state_id_t> unmarked;
   std::set<state_id_t> move;
   std::set<state_id_t> U;
   std::set<state_id_t> T;

   unsigned int state, Tstate, Ustate;

   nfa_t *dfa = new nfa_t();
   
   /* initially, e-closure is the only state in Dstates and it is unmarked */
   state = dfa->add_state();
   dfa->start = state;
   compute_eps_closure(this->start, T);
   Dstate_map[state_set_key_t(T)] = state;
   Dstate_inv_map[state] = T;
   unmarked.push_back(state);

   /* while there is an unmarked state T in Dstates do begin */
   while (!unmarked.empty())
   {
      Tstate = unmarked.front(); unmarked.pop_front();
      T = Dstate_inv_map[Tstate];

      /* for each input symbol a do begin */
      for (unsigned int i=0; i < MAX_SYMS; i++)
      {
	 /* U := e-closure(move(T,a)) */
	 compute_move(T, i, move);
	 compute_eps_closure(move, U);

	 /* if U is not in Dstates then */
	 Dstate_map_iter = Dstate_map.find(state_set_key_t(U));
	 if (Dstate_map_iter == Dstate_map.end())
	 {
	    Ustate = dfa->add_state();
	    Dstate_map[state_set_key_t(U)] = Ustate;
	    Dstate_inv_map[Ustate] = U;

	    unmarked.push_back(Ustate);

	    /* if any state in U is accepting, then need to mark
	       Ustate as accepting. */
	    std::list<state_id_t> out;
	    set_intersection(U.begin(), U.end(),
			     accepting.begin(), accepting.end(),
			     back_inserter(out));
	    if (!out.empty())
	    {
	       dfa->accepting.push_back(Ustate);
	    }

	 }
	 else
	 {
	    Ustate = (*Dstate_map_iter).second;
	 }
	 
	 dfa->add_trans(Tstate, Ustate, i);
      }
   }

   /* this sets the id and propagates the accepting states */
   dfa->set_id(this->machine_id);
   dfa->set_accepting_states(dfa->machine_id);
   return dfa;
}

/*-----------------------------------------------------------------------------
 * nfa_t::is_deterministic
 *---------------------------------------------------------------------------*/
bool nfa_t::is_deterministic(void) const
{
   for (unsigned int j=0; j < states.size(); j++)
   {
      const nfa_state_t& st = states[j];

      if (!st.eps_trans.empty())
	 return false;

      for (unsigned int i=0; i < MAX_SYMS; i++)
      {
	 if (st.trans[i].size() > 1)
	 {
	    return false;
	 }
      }
   }

   return true;
}


/*-----------------------------------------------------------------------------
 * nfa_t::set_accepting_states
 *---------------------------------------------------------------------------*/
void nfa_t::set_accepting_states(unsigned int id)
{
   for (unsigned int j=0; j < states.size(); j++)
   {
      states[j].accept_id.clear();
   }

   list<state_id_t>::iterator li;
   for (li = accepting.begin(); li != accepting.end(); li++)
   {
      states[*li].accept_id.push_back((int)id);
   }
}


/*-----------------------------------------------------------------------------
 * nfa_t::xfa_output()
 *   This routine outputs an nfa/dfa in XFA format.  This specific function
 *   just wraps xfa_output(FILE *) and provides an alternate interface.
 *---------------------------------------------------------------------------*/
void nfa_t::xfa_output(const char *filename, unsigned int sig_id)
{
   FILE *f = fopen(filename, "w");
   if (f == NULL)
   {
      fprintf(stdout, "ERROR:  could not open file %s for writing.\n"
	      "Reason: %s\n", filename, strerror(errno));
      exit(-1);
   }

   xfa_output(f, sig_id);
   fclose(f);
}

/* output an nfa/dfa to be used by BDD project */
void nfa_t::bdd_output(const char *filename, unsigned int sig_id)
{
   FILE *f = fopen(filename, "a");
   if (f == NULL)
   {
      fprintf(stdout, "ERROR:  could not open file %s for writing.\n"
	      "Reason: %s\n", filename, strerror(errno));
      exit(-1);
   }

   bdd_output(f, sig_id);
   fclose(f);
}


/*-----------------------------------------------------------------------------
 * nfa_t::xfa_output()
 *   This routine outputs an nfa/dfa in XFA format.
 *   Most of this code is copied from rca_ascii_out in types.cc
 *---------------------------------------------------------------------------*/
void nfa_t::xfa_output(FILE *out, unsigned int sig_id)
{
   list<state_id_t>::iterator li;

   /* little hack to deal with my indecision about the right way to
    * associate IDs with signatures. TODO fix this.*/
   if (sig_id == 0)
      sig_id = this->machine_id;

   fprintf(out, "sid%u->S%u {\n", sig_id, this->start);

   /* first output the states themselves */
   for (unsigned int i = 0; i < states.size(); i++)
   {
      /* Note:  States that don't have annotations do not need to be
	 explicitly printed, but we go ahead and do it just in case */
      nfa_state_t& t = states[i];

      fprintf(out, "   S%u(", i);
      if (!t.accept_id.empty())
      {
	 bool first = true;

	 list<int>::iterator it;
	 for (it = t.accept_id.begin(); it != t.accept_id.end(); it++)
	 {
	    if (!first)
	       fprintf(out, ",");

	    fprintf(out, "[%u,fire]", (unsigned int)(*it));
	    first = false;
	 }
      }
      fprintf(out, ");\n");
   }
   
   /* second, output the transitions */
   for (unsigned int i=0; i < states.size(); i++)
   {
      state_id_t src = i;
      state_id_t dest;
      
      set<state_id_t> dests;
      set<state_id_t>::iterator di;

      /* build a list of destinations */
      for (unsigned int i=0; i < MAX_SYMS; i++)
      {
	 dests.insert(states[src].trans[i].begin(), 
		      states[src].trans[i].end());
      }
      
      /* look at each reachable destination and collect the transition
       * symbols to that destination */
      fprintf(out, "\n");
      for (di = dests.begin(); di != dests.end(); di++)
      {
	 unsigned int j, start;
	 bool first = true;
	 
	 dest = (*di);
	 fprintf(out, "   S%u -> S%u (", src, dest);
	 
	 j=0;
	 do {
	    start = j;
	    
	    /* spin until we find something.  Since dest is a valid
	     * destination computed above, we are guaranteed to find 
	     * something */
	    li = find(states[src].trans[j].begin(), 
		      states[src].trans[j].end(), dest);
	    if (li == states[src].trans[j].end())
	    {
	       j++;
	       continue;
	    }
	    
	    /* Found it.  spin until we don't find something */
	    
	    while ((j <= 255) &&
		   (find(states[src].trans[j].begin(),
			 states[src].trans[j].end(), dest) != 
		                                  states[src].trans[j].end()))
	    {
	       j++;
	    }
	    j--;

	    if (!first)
	       fprintf(out, ", ");
	 
	    if (j - start > 1) /* a group of characters */
	    {
	       output_char_hack(out, start);
	       fprintf(out, "..");
	       output_char_hack(out, j);
	       //output_range(out, start, j);
	    }
	    else if (j - start == 1)
	    {
	       output_char_hack(out, start);
	       fprintf(out, ", ");
	       output_char_hack(out, j);
	       //output_char(out, start);
	       //fprintf(out, ", ");
	       //output_char(out, j);
	    }
	    else /* just a single character */
	    {
	       output_char_hack(out, start);
	       //output_char(out, start);
	       
	    }

	    j++;
	    first = false;

	 } while (j <= 255);
	 fprintf(out, " );\n");

      }

      /* Now, add epsilon transitions */
      for (li = states[src].eps_trans.begin(); 
	   li != states[src].eps_trans.end(); li++)
      {
	 fprintf(out, "   S%u -> S%u ( '\\eps' );\n", src, *li);
      }
   }

   fprintf(out, "}\n");
}

/* slightly adopted from Randy's code by Liu Yang */
void nfa_t::bdd_output(FILE *out, unsigned int sig_id)
{
   list<state_id_t>::iterator li;
   unsigned int num_accept_states = 0, num_trans = 0;
   //vector<state_id_t> accept_ids;
   multimap<int, int> accept_ids;
   multimap<int, int>::iterator mit;
  

   if (sig_id == 0)
      sig_id = this->machine_id;

   /* output the number of states */
   fprintf(out, "%u\n", states.size());

   /* collect the accept_id, i.e., signature id */
   for (unsigned int i = 0; i < states.size(); i++)
   {
      nfa_state_t& t = states[i];

      if (!t.accept_id.empty())
      {
	 list<int>::iterator it;
	 for (it = t.accept_id.begin(); it != t.accept_id.end(); it++)
	    accept_ids.insert(pair<int, int>(i, *it));
      }
   }

   /* output (acc_state_no accept_id) pairs to indicate sig id for matching */
   fprintf(out, "%d\n", accept_ids.size());
   mit = accept_ids.begin();
   while (mit != accept_ids.end()) {
     fprintf(out, "%d %d\n", mit->first, mit->second);
     mit++;
   }

   /* output number of accepting states and the accepting states */
   fprintf(out, "%u ", accepting.size());
   std::list<state_id_t>::iterator it = accepting.begin();
   while (it != accepting.end()) {
     fprintf(out, "%u ", *it);
     it++;
   }
   
   fprintf(out, "\n");
   
   /* output the start state */
   fprintf(out, "%u\n", this->start);

   /* count the number of transitions before output the transitions */
   for (unsigned int i=0; i < states.size(); i++)
   {
      state_id_t src = i;
      state_id_t dest;
      
      set<state_id_t> dests;
      set<state_id_t>::iterator di;

      /* build a list of destinations */
      for (unsigned int i=0; i < MAX_SYMS; i++)
      {
	 dests.insert(states[src].trans[i].begin(), 
		      states[src].trans[i].end());
      }
      
      /* look at each reachable destination and collect the transition
       * symbols to that destination */
      for (di = dests.begin(); di != dests.end(); di++)
      {
	 unsigned int j, start;
	 bool first = true;
	 
	 dest = (*di);
	 
	 j=0;
	 do {
	    start = j;
	    
	    /* spin until we find something.  Since dest is a valid
	     * destination computed above, we are guaranteed to find 
	     * something */
	    li = find(states[src].trans[j].begin(), 
		      states[src].trans[j].end(), dest);
	    if (li == states[src].trans[j].end())
	    {
	       j++;
	       continue;
	    }
	    
	    /* Found it.  spin until we don't find something */
	    
	    while ((j <= 255) &&
		   (find(states[src].trans[j].begin(),
			 states[src].trans[j].end(), dest) != 
		                                  states[src].trans[j].end()))
	    {
	       j++;
	    }
	    j--;

	 
	    if (j - start > 1) /* a group of characters */
	      num_trans += j - start + 1;
	    else if (j - start == 1)
	      num_trans += 2;
	    else /* just a single character */
	      num_trans += 1;

	    j++;

	 } while (j <= 255);

      }

      /* Now, add epsilon transitions */
      for (li = states[src].eps_trans.begin(); 
	   li != states[src].eps_trans.end(); li++)
      {
	 num_trans += 1;
      }
   }

   /* output number of transitions */   
   fprintf(out, "%u\n", num_trans);

   /* output the transitions */
   for (unsigned int i=0; i < states.size(); i++)
   {
      state_id_t src = i;
      state_id_t dest;
      
      set<state_id_t> dests;
      set<state_id_t>::iterator di;

      /* build a list of destinations */
      for (unsigned int i=0; i < MAX_SYMS; i++)
      {
	 dests.insert(states[src].trans[i].begin(), 
		      states[src].trans[i].end());
      }
      
      /* look at each reachable destination and collect the transition
       * symbols to that destination */
      for (di = dests.begin(); di != dests.end(); di++)
      {
	 unsigned int j, start;
	 bool first = true;
	 
	 dest = (*di);
	 
	 j=0;
	 do {
	    start = j;
	    
	    /* spin until we find something.  Since dest is a valid
	     * destination computed above, we are guaranteed to find 
	     * something */
	    li = find(states[src].trans[j].begin(), 
		      states[src].trans[j].end(), dest);
	    if (li == states[src].trans[j].end())
	    {
	       j++;
	       continue;
	    }
	    
	    /* Found it.  spin until we don't find something */
	    
	    while ((j <= 255) &&
		   (find(states[src].trans[j].begin(),
			 states[src].trans[j].end(), dest) != 
		                                  states[src].trans[j].end()))
	    {
	       j++;
	    }
	    j--;

	    if (!first)
	      ;  	
	       //fprintf(out, "");
	 
	    if (j - start > 1) /* a group of characters */
	    {
	      for (unsigned k = start; k <= j; k++) {
		fprintf(out, "%u %u %u\n", src, k, dest);
	      }	
	    }
	    else if (j - start == 1)
	    {
	      fprintf(out, "%u %u %u\n", src, start, dest);
	      fprintf(out, "%u %u %u\n", src, j, dest);
	    }
	    else /* just a single character */
	    {
	      fprintf(out, "%u %u %u\n", src, start, dest);
	    }

	    j++;
	    first = false;

	 } while (j <= 255);

      }

      /* Now, add epsilon transitions */
      for (li = states[src].eps_trans.begin(); 
	   li != states[src].eps_trans.end(); li++)
      {
	 fprintf(out, "%u -1 %u\n", src, *li);
      }
   }

}


/*-----------------------------------------------------------------------------
 *nfa_t::output_char_hack
 *  does the actual output of a character.  This is lifted from
 *  types.cc
 *---------------------------------------------------------------------------*/
void nfa_t::output_char_hack(FILE *out, unsigned int c)
{
   assert (c <= 255);

   char *s = NULL;
   switch (c)
   {
      case '\a': s = "'\\a'"; break;
      case '\b': s = "'\\b'"; break;
      case '\f': s = "'\\f'"; break;
      case '\n': s = "'\\n'"; break;
      case '\r': s = "'\\r'"; break;
      case '\t': s = "'\\t'"; break;
      case '\v': s = "'\\v'"; break;
      case '\\': s = "'\\\\'"; break;
      case '\?': s = "'\\?'"; break;
      case '\'': s = "'\\''"; break;
      case '\"': s = "'\\\"'"; break;
      default:
	 break;
   }

   if (s != NULL)
   {
      fprintf(out, "%s", s);
   }
   else
   {
      if (c >= 0x20 && c <= 0x7e)
	 fprintf(out, "'%c'", (unsigned char)c);
      else
	 fprintf(out, "%u", (unsigned char)c);
   }
}


/*-----------------------------------------------------------------------------
 *dfa_tab_t::populate
 *  converts a deterministic NFA to a compact table
 *---------------------------------------------------------------------------*/
bool dfa_tab_t::populate(const nfa_t& nfa)
{
   
   if (!nfa.is_deterministic())
   {
      return false;
   }

   /* create the 2d array.  just use new/delete for now to be consistent */
   this->tab = new unsigned int *[nfa.states.size()];

   for (unsigned int i=0; i < nfa.states.size(); i++)
   {
      this->tab[i] = new unsigned int[MAX_SYMS];
   }

   /* populate the table */
   for (unsigned int i=0; i < nfa.states.size(); i++)
   {
      for (unsigned int j=0; j < MAX_SYMS; j++)
      {
	 this->tab[i][j] = nfa.states[i].trans[j].front();
      }
   }

   /* now, construct the accepting state id list */

   /* create the array of accept id lists */
   this->acc = new int *[nfa.states.size()];

   for (unsigned int i=0; i < nfa.states.size(); i++)
   {
      unsigned int sz = nfa.states[i].accept_id.size();
      unsigned int j=0;

      if (sz > 0)
      {
	 this->acc[i] = new int[sz+1];
	 
	 std::list<int>::const_iterator ai;
	 for (ai = nfa.states[i].accept_id.begin();
	      ai != nfa.states[i].accept_id.end();
	      ai++)
	 {
	    this->acc[i][j++] = *ai;
	 }
	 this->acc[i][j] = -1;
      }
      else
      {
	 this->acc[i] = 0;
      }
   }

   /* set the start state */
   this->num_states = nfa.states.size();
   this->start = nfa.start;
   this->machine_id = nfa.machine_id;

   return true;
}   

bool dfa_tab_t::populate_dfa_from_file(char * dfa_fileName, unsigned int dfa_count)
{
  FILE *f = NULL;  
  unsigned int i,j;  
  unsigned long dfa_num_states, dfa_accept_ids_size, dfa_trans_num;
  unsigned long int next_state, cur_state, dfa_start;
  unsigned int input_symbol, accept_id; 
  std::list<int> **temp_accept_id;
  
#ifdef DEBUG
   //cout<<"dfa_count="<<dfa_count<<" file "<<dfa_fileName<<endl;
   printf("dfa_count=%u file %s \n",dfa_count,dfa_fileName);
#endif
       if ( (f=fopen( dfa_fileName, "r")) == NULL) {
            printf("could not open file %s\n", dfa_fileName);
	    exit(0);
       }

       // cout<<"there"<<endl;
       
      // dta.populate(*com_dfa);
        fscanf(f,"%lu", &dfa_num_states);
#ifdef DEBUG
//cout<<"no of state is "<< dfa_num_states<<endl;
printf("no of state is %u\n", dfa_num_states);
#endif       
       /* create the 2d array.  just use new/delete for now to be consistent */
  	this->tab = new unsigned int *[dfa_num_states];
        this->acc = new int* [dfa_num_states];
        //temp_accept_id= new  std::list<int> [dfa_num_states];  
        temp_accept_id = new  list<int>* [dfa_num_states];  
	

        for ( i=0; i < dfa_num_states; i++)
	{
	    this->tab[i] = new unsigned int[MAX_SYMS];
            temp_accept_id[i]=new list<int>();
        }


        /* now, construct the accepting state id list */

   	/* create the array of accept id lists */
	 fscanf(f,"%lu",&dfa_accept_ids_size); 
#ifdef DEBUG
//cout<<"no of accpet_id is "<< dfa_accept_ids_size<<endl;
printf("no of accpet_id is %u \n", dfa_accept_ids_size);         
#endif        
   	 for ( i=0; i < dfa_accept_ids_size; i++)
         {       
              fscanf(f,"%lu %u",&cur_state,&accept_id);
              temp_accept_id[cur_state]->push_back(accept_id);

	 }
         for ( i=0; i < dfa_num_states; i++)
   	 {
	      unsigned int sz = temp_accept_id[i]->size();
	      j=0;

	      if (sz > 0)
	      {
		 this->acc[i] = new int[sz+1];
	 
		 //std::list<int>::const_iterator ai;
		 list<int>::const_iterator ai;

		 for (ai = temp_accept_id[i]->begin();
	      	      ai != temp_accept_id[i]->end();
		      ai++)
		 {
		    this->acc[i][j++] = *ai;
		 }
		 this->acc[i][j] = -1;
      	      }
	      else
      	      {
		 this->acc[i] = 0;
	      }
	  }
         
       	/* set the start state */
	this->num_states = dfa_num_states;
        fscanf(f,"%lu",&dfa_start);
#ifdef DEBUG
//cout<<"start state is "<< dfa_start<<endl;
printf("start state is %u\n", dfa_start);
#endif
	this->start = dfa_start;
	this->machine_id = dfa_count;
 
        /* populate the table */
        fscanf(f,"%lu",&dfa_trans_num);
#ifdef DEBUG
//cout<<"number of transition is "<< dfa_trans_num<<endl;
printf("number of transition is %u\n", dfa_trans_num);
#endif

        assert(dfa_trans_num==(dfa_num_states*MAX_SYMS));
	for ( i=0; i < dfa_num_states; i++)
	{
       	     for ( j=0; j < MAX_SYMS; j++)
      	     {  //fscanf(f,"%lu %lu %lu",&cur_state, &input_symbol,&next_state);
		 //dta.tab[i][j] = next_state;
		 this->tab[i][j]=NULL;
             }
        }

        for(i=0; i<dfa_trans_num; i++)
        {
           fscanf(f,"%lu %u %lu",&cur_state, &input_symbol,&next_state);
           this->tab[cur_state][input_symbol]=next_state;
        }
    fclose(f);
    
    for ( i=0; i < dfa_num_states; i++)
    {  if(temp_accept_id[i]!=NULL)
               delete temp_accept_id[i];
    }
    if(temp_accept_id!=NULL)
          delete temp_accept_id;
    
    return true;
}


/*-----------------------------------------------------------------------------
 *dfa_tab_t::to_nfa_t
 *  the inverse of "populate", converts a DFA in dfa_tab_t format to
 *  a dfa in nfa_t format.
 *---------------------------------------------------------------------------*/
nfa_t* dfa_tab_t::to_nfa_t(void)
{
   if (this->num_states == 0)
   {
      return 0;
   }

   /* create the machine */
   nfa_t *nfa = new nfa_t();

   /* preallocate the number of states */
   nfa->states.reserve(this->num_states);

   /* create all the states */
   for (unsigned int i=0; i < this->num_states; i++)
   {
      state_id_t k = nfa->add_state();
      assert (k == (state_id_t)i);
   }

   /* fill up the transition table */
   for (unsigned int i=0; i < this->num_states; i++)
   {
      for (unsigned int j=0; j < MAX_SYMS; j++)
      {
	 nfa->add_trans(i, this->tab[i][j], j);
      }
   }

   /* now, set the accepting states. */
   for (unsigned int i=0; i < this->num_states; i++)
   {
      if (this->acc[i])
      {
	 int *j = &(this->acc[i][0]);
	 while (*j != -1)
	 {
	    nfa->states[i].accept_id.push_back(*j);
	    j++;
	 }

	 nfa->accepting.push_back(i);
      }
   }

   /* set the start state */
   nfa->start = this->start;
   nfa->machine_id = this->machine_id;

   /* do some sanity checks */
   if (!nfa->is_deterministic())
   {
      fprintf(stderr, 
	      "Error in %s (%s:%d): constructed DFA is not deterministic!\n",
	      __func__, __FILE__, __LINE__);
      exit(-1);
   }
   
   if (nfa->next_id != nfa->states.size())
   {
      fprintf(stderr, 
	      "Error in %s (%s:%d): mismatch in state count!\n",
	      __func__, __FILE__, __LINE__);
      exit(-1);
   }      

   return nfa;
}


/*-----------------------------------------------------------------------------
 *dfa_tab_t::cleanup
 *  frees up all the used memory
 *---------------------------------------------------------------------------*/
void dfa_tab_t::cleanup(void)
{
   for (unsigned int i = 0; i < this->num_states; i++)
   {
      delete[] this->tab[i];
   }

   delete[] this->tab;
   this->tab = 0;

   for (unsigned int i=0; i < this->num_states; i++)
   {
      if (this->acc[i])
      {
	 delete[] this->acc[i];
      }
   }
   delete[] this->acc;
   this->acc = 0;

   this->num_states = 0;
   this->start = 0;
}   


/*-----------------------------------------------------------------------------
 *dfa_tab_t::simulate
 *  applies a DFA to a string
 *  [in] buf - input tape
 *  [in] len - len of input tape
 *  [in] intermed - true, then intermediate acceptance is flagged;
 *                  false, then acceptance only at ends.
 *---------------------------------------------------------------------------*/
void dfa_tab_t::simulate(const unsigned char *buf, unsigned int len, 
			 bool intermed=true) const
{
   if (this->num_states == 0)
   {
      return;
   }
   
   unsigned int cur = this->start;
   for (unsigned int i=0; i < len; i++)
   {
      cur = this->tab[cur][buf[i]];
      
      if (intermed && this->acc[cur])
      {
	 unsigned int j=0;

	 fprintf(stdout, "Matched, at offset %u: ", i);
	 while (this->acc[cur][j] != -1)
	 {
	    fprintf(stdout, "%d ", this->acc[cur][j]);
	    j++;
	 }
	 fprintf(stdout, "\n");
      }
   }

   if (!intermed && this->acc[cur])
   {
      unsigned int j=0;
      
      fprintf(stdout, "Matched, at end: ");
      while (this->acc[cur][j] != -1)
      {
	 fprintf(stdout, "%d ", this->acc[cur][j]);
	 j++;
      }
      fprintf(stdout, "\n");
   }
   else if (!intermed && !this->acc[cur])
   {
      fprintf(stdout, "no match.\n");
   }
}


/*#############################################################################
 *###########################################################################*/



/*-----------------------------------------------------------------------------
 * do_kleene_closure
 *---------------------------------------------------------------------------*/
nfa_t* do_kleene_closure(nfa_t *input)
{
   nfa_t *n = input->clone();
   state_id_t new_start, new_end, old_start;

   new_start = n->add_state();
   new_end   = n->add_state();

   /* add the new states */
   old_start = n->start;

   /* add the epsilon transitions */
   n->add_eps_trans(new_start, old_start);
   n->add_eps_trans(new_start, new_end);
   n->add_eps_trans(n->accepting, old_start);
   n->add_eps_trans(n->accepting, new_end);
   
   /* Set the new start and accepting states.
    * The clone operation copies the old accepting states over.
    * We need to remove those and replace with out new accepting
    * state */
   n->start = new_start;
   n->accepting.erase(n->accepting.begin(), n->accepting.end());
   n->accepting.push_back(new_end);

   return n;
}


/*-----------------------------------------------------------------------------
 * do_concat
 *---------------------------------------------------------------------------*/
nfa_t *do_concat(nfa_t *left, nfa_t *right)
{
   nfa_t *n = new nfa_t();
   state_id_t left_start, right_start;
   std::list<state_id_t> left_accept, right_accept;

   n->fold_in(left, left_start, left_accept);
   n->fold_in(right, right_start, right_accept);

   /* set up new start, accepting states */
   n->start = left_start;
   n->accepting = right_accept;

   /* link via epsilon transitions */
   n->add_eps_trans(left_accept, right_start);

   return n;
}


/*-----------------------------------------------------------------------------
 * do_alternation -- implements the OR (|) operator
 *---------------------------------------------------------------------------*/
nfa_t *do_alternation(nfa_t *left, nfa_t *right)
{
   nfa_t *n = new nfa_t();
   state_id_t left_start, right_start;
   std::list<state_id_t> left_accept, right_accept;

   state_id_t new_start, new_accept;

   n->fold_in(left, left_start, left_accept);
   n->fold_in(right, right_start, right_accept);
   
   new_start = n->add_state();
   new_accept = n->add_state();

   /* set new start and accepting states */
   n->start = new_start;
   n->accepting.push_back(new_accept);

   /* add epsilon transitions */
   n->add_eps_trans(n->start, left_start);
   n->add_eps_trans(n->start, right_start);
   n->add_eps_trans(left_accept, new_accept);
   n->add_eps_trans(right_accept, new_accept);
   
   return n;
}

/*-----------------------------------------------------------------------------
 * do_alternation_acc -- implements the OR (|) operator 
 * and keep the accepting states of the source NFA's
 *---------------------------------------------------------------------------*/
nfa_t *do_alternation_acc(nfa_t *left, nfa_t *right)
{
   nfa_t *n = new nfa_t();
   state_id_t left_start, right_start;
   std::list<state_id_t> left_accept, right_accept;

   state_id_t new_start, new_accept;

   n->fold_in_acc(left, left_start, left_accept);
   n->fold_in_acc(right, right_start, right_accept);
   
   new_start = n->add_state();
   new_accept = n->add_state();

   /* set new start and accepting states (only difference with do_alternation)*/
   n->start = new_start;
   n->accepting.push_back(new_accept);
   n->accepting.insert(n->accepting.end(), left_accept.begin(), left_accept.end());
   n->accepting.insert(n->accepting.end(), right_accept.begin(), right_accept.end());

   /* add epsilon transitions */
   n->add_eps_trans(n->start, left_start);
   n->add_eps_trans(n->start, right_start);
   n->add_eps_trans(left_accept, new_accept);
   n->add_eps_trans(right_accept, new_accept);
   
   return n;
}


/*-----------------------------------------------------------------------------
 * do_positive_closure
 *---------------------------------------------------------------------------*/
nfa_t *do_positive_closure(nfa_t *input)
{
   nfa_t *kleene, *final;

   kleene = do_kleene_closure(input);
   final = do_concat(input, kleene);

   delete kleene;

   return final;
}


/*-----------------------------------------------------------------------------
 * do_repetition_count_single
 *  Handles cases of the form nfa{m}, where m is a non-zero repetition count
 *  specifier.
 *---------------------------------------------------------------------------*/
nfa_t *do_repetition_count_single(nfa_t *input, unsigned int count)
{
   nfa_t *intermed, *f;

   if (count == 0)
   {
      return make_nfa_eps();
   }

   nfa_t *new_nfa = input->clone();
   for (unsigned int i=0; i < count-1; i++)
   {
      intermed = input->clone();
      f = do_concat(new_nfa, intermed);
      delete new_nfa;
      delete intermed;
      new_nfa = f;
   }

   return new_nfa;
}


/*-----------------------------------------------------------------------------
 * do_repetition_count_range
 *---------------------------------------------------------------------------*/
nfa_t *do_repetition_count_range(nfa_t *in, unsigned int lo, unsigned int hi)
{
   assert(lo <= hi);
   nfa_t *intermed, *f;

   nfa_t *new_nfa = do_repetition_count_single(in, lo);

   for (unsigned int i=lo+1; i<=hi; i++)
   {
      intermed = do_repetition_count_single(in, i);
      f = do_alternation(new_nfa, intermed);
      delete new_nfa;
      delete intermed;

      new_nfa = f;
   }

   return new_nfa;
}


/*-----------------------------------------------------------------------------
 * do_repetition_count_unrestricted
 *---------------------------------------------------------------------------*/
nfa_t *do_repetition_count_unrestricted_max(nfa_t *in, unsigned int lo)
{
   nfa_t *rep    = do_repetition_count_single(in,lo);
   nfa_t *kleene = do_kleene_closure(in);
   nfa_t *final  = do_concat(rep, kleene);

   delete rep;
   delete kleene;

   return final;
}
   

/*-----------------------------------------------------------------------------
 * do_repetition_count_unrestricted_min
 *   We just do a little trick here - min is 0, so we just add an epsilon
 *   transition from start to accepting states.
 *---------------------------------------------------------------------------*/
nfa_t *do_repetition_count_unrestricted_min(nfa_t *in, unsigned int max)
{
   nfa_t *final = do_repetition_count_range(in, 1, max);
   final->add_eps_trans(final->start, final->accepting);

   return final;
}


/*-----------------------------------------------------------------------------
 * do_at_most_once
 *   implements the question mark operator (?)
 *   Essentially, we clone the input and add an epsilon transition from
 *   the start state to the accepting states.
 *---------------------------------------------------------------------------*/
nfa_t *do_at_most_once(nfa_t *in)
{
   nfa_t *final = in->clone();
   state_id_t newstart, newaccept;

   newstart  = final->add_state();
   newaccept = final->add_state();

   /* add epsilon transitions */
   final->add_eps_trans(newstart, final->start);
   final->add_eps_trans(final->accepting, newaccept);
   final->add_eps_trans(newstart, newaccept);

   /* rework start, accepting states */
   final->start = newstart;
   final->accepting.erase(final->accepting.begin(), final->accepting.end());
   final->accepting.push_back(newaccept);


   return final;
}


/*-----------------------------------------------------------------------------
 * make_nfa_char
 *   Basis case function that creates an nfa from a set of characters.
 *---------------------------------------------------------------------------*/
nfa_t *make_nfa_char(const std::bitset<MAX_SYMS>& chars)
{
   nfa_t *n = new nfa_t();
   state_id_t start, accept;

   start = n->add_state();
   accept = n->add_state();

   n->start = start;
   n->accepting.push_back(accept);

   /* add the appropriate transitions */
   for (unsigned int i=0; i < MAX_SYMS; i++)
   {
      if (chars.test(i))
      {
	 //fprintf(stdout, "Adding transition %u->%u on sym %2x\n", 
	 //	 start, accept, i);
	 if (g_flags.case_insensitive && isalpha(i))
	 {
	    n->add_trans(start, accept, toupper(i));
	    n->add_trans(start, accept, tolower(i));
	 }
	 else
	    n->add_trans(start, accept, i);
      }
   }

   //fprintf(stdout, "inside make_nfa_char.  Just made the following:\n");
   //n->print_machine(stdout);

   return n;
}


/*-----------------------------------------------------------------------------
 * make_nfa_eps
 *   Basis case function that creates an nfa consisting of a single epsilon
 *---------------------------------------------------------------------------*/
nfa_t *make_nfa_eps(void)
{
   nfa_t *n = new nfa_t();
   state_id_t start, accept;

   start = n->add_state();
   accept = n->add_state();

   n->start = start;
   n->accepting.push_back(accept);

   n->add_eps_trans(start, accept);
   return n;
}


/*-----------------------------------------------------------------------------
 * make_nfa_empty
 *   Basis case function that creates an nfa accepting nothing
 *---------------------------------------------------------------------------*/
nfa_t *make_nfa_empty(void)
{
   nfa_t *n = new nfa_t();
   state_id_t start, accept;

   start = n->add_state();
   accept = n->add_state();

   n->start = start;
   n->accepting.push_back(accept);

   //for empty nfa, there are no transitions to accepting state.
   //n->add_eps_trans(start, accept);
   return n;
}


/*-----------------------------------------------------------------------------
 * make_nfa_caret
 *   Builds an nfa for the beginning-of-line (^) assertion
 *---------------------------------------------------------------------------*/
nfa_t *make_nfa_caret(void)
{
   nfa_t *n = new nfa_t();
   state_id_t one, two, three, four;

   one = n->add_state();
   two = n->add_state();
   three = n->add_state();
   four = n->add_state();

   for (unsigned int i=0; i < MAX_SYMS; i++)
      n->add_trans(two, two, i);

   n->add_trans(two, three, '\n');
   n->add_eps_trans(one, two);
   n->add_eps_trans(one, four);
   n->add_eps_trans(three, two);
   n->add_eps_trans(three, four);

   n->start = one;
   n->accepting.push_back(four);

   return n;
}




/*#############################################################################
 * code for walking the parse tree to produce an NFA w/ eps trans
 *###########################################################################*/


/*-----------------------------------------------------------------------------
 * disjunct_to_nfa
 *---------------------------------------------------------------------------*/
nfa_t *disjunct_to_nfa(disjunction *re)
{
   list<alternative *>::iterator li;
   nfa_t *alt_nfa, *new_nfa, *f;

   li = re->alternatives.begin();
   assert(li != re->alternatives.end());

   alt_nfa = alternative_to_nfa(*li);
   li++;

   while (li != re->alternatives.end())
   {
      new_nfa = alternative_to_nfa(*li);
      f = do_alternation(alt_nfa, new_nfa);
      delete new_nfa;
      delete alt_nfa;
      alt_nfa = f;
      li++;
   }

   return alt_nfa;
}

/*-----------------------------------------------------------------------------
 * alternative_to_nfa
 *---------------------------------------------------------------------------*/
nfa_t *alternative_to_nfa(alternative *alt)
{
   list<term *>::iterator li;
   nfa_t *new_nfa, *term_nfa, *f;

   /* comment DAWLISH.  This handles the case that we have no terms, e.g., 
      the RE is empty, it is also needed when dealing with exprs of the form
      '(a|b|)c', in which there is an empty alternative.  For this latter 
      case, we need make_nfa_eps() instead of make_nfa_empty() */
   if (alt->terms.empty())
   {
      return make_nfa_eps();
   }

   li = alt->terms.begin();
   term_nfa = term_to_nfa(*li);
   li++;

   while (li != alt->terms.end())
   {
      new_nfa = term_to_nfa(*li);
      f = do_concat(term_nfa, new_nfa);
      delete new_nfa;
      delete term_nfa;
      term_nfa = f;
      li++;
   }

   return term_nfa;
}


/*-----------------------------------------------------------------------------
 * term_to_nfa
 *---------------------------------------------------------------------------*/
nfa_t *term_to_nfa(term *trm)
{
   nfa_t *atom_nfa;
   nfa_t *final=0;
   assertion* asrt;

   if ((asrt = dynamic_cast<assertion*>(trm)))
   {
      if (asrt->at == assertion::CIRCUM)
      {
	 if (g_flags.multi_line) /* we've got an assert and multiline allowed*/
	 {
	    final = make_nfa_caret();
	 }
	 else
	 {
	    fprintf(stdout, 
		    "WARNING: ^ assert present, but multi-line flag not set. "
		    "Ignoring.\n");
	    
	    final = make_nfa_eps();
	 }
      }
      else if (asrt->at == assertion::DOLLAR)
      {
	    fprintf(stdout, 
		    "WARNING: ignoring occurrence of $ assert.");
	    final = make_nfa_eps();
      }
	 
      return final;
   }
   

   atom_nfa = atom_to_nfa(trm->a_);

   /* now, check to see if we have a quantifier, and process
      accordingly */
   if (trm->quant_)
   {
      switch(trm->quant_->quant_type_)
      {
	 case quantifier::STAR:
	    final = do_kleene_closure(atom_nfa); 
	    break;
	 case quantifier::PLUS:
	    final = do_positive_closure(atom_nfa); 
	    break;
	 case quantifier::QM:
	    final = do_at_most_once(atom_nfa); 
	    break;
	 case quantifier::RANGE:
	 {
	    int lo = trm->quant_->low_;
	    int hi = trm->quant_->high_;
	    
	    if (lo == hi)
	       final = do_repetition_count_single(atom_nfa, lo);
	    else if (lo == -1)
	       final = do_repetition_count_unrestricted_min(atom_nfa, hi);
	    else if (hi == -1)
	       final = do_repetition_count_unrestricted_max(atom_nfa, lo);
	    else
	       final = do_repetition_count_range(atom_nfa, lo, hi);
	 }
	 break;
	 default:
	    assert(0);
      }

      delete atom_nfa;
      return final;
   }
   else
      return atom_nfa;
}


/*-----------------------------------------------------------------------------
 * atom_to_nfa
 *---------------------------------------------------------------------------*/
nfa_t *atom_to_nfa(atom *atm)
{
   char_class* cc;
   disjunction *dis;
   nfa_t *final;

   switch (atm->atom_type_)
   {
      case atom::CHAR_CLASS:
      {
	 cc = dynamic_cast<char_class*>(atm->obj);
	 assert(cc != NULL);

	 bitset<MAX_SYMS> chars = cc->chars_;
	 if (cc->negated_)
	    chars.flip();
	 final = make_nfa_char(chars);
	 break;
      }
      case atom::DISJUNCT:
      {
	 dis = dynamic_cast<disjunction*>(atm->obj);
	 assert(dis != NULL);

	 final = disjunct_to_nfa(dis);
	 break;
      }
      default:
	 assert(0);
   }
   
   return final;
}





