/*-----------------------------------------------------------------------------
 * minimize.cc
 *   Implements the Hopcroft-Ullman O(n^2) algorithm as given in
 *   _Introduction_to_Automata_Theory,_Languages,_and_Computation_
 *
 * Author:  Randy Smith
 * Date:    21 July 2006
 *
 *
 *
 *    Copyright 2006,2007 Randy Smith, smithr@cs.wisc.edu
 *
 *    This file contains unpublished confidential proprietary
 *    work of Randy Smith, Department of Computer Sciences,
 *    University of Wisconsin--Madison.  No use of any sort, including
 *    execution, modification, copying, storage, distribution, or reverse
 *    engineering is permitted without the express written consent of
 *    Randy Smith.
 *
 * TODO:
 *  (1) the code is SLOW.  Lookup is O(nlogn), and it has to be performed
 *      millions of times.  If lookup can be made to be O(1), then we should
 *      see significant speedup.
 *
 *  probably want to think about encapsulating all of this in a class of
 *  some sort.
 *-----------------------------------------------------------------------------
 * History:
 * $Log: minimize.cc,v $
 * Revision 1.1  2009/06/09 18:51:22  vinodg
 * *** empty log message ***
 *
 * Revision 1.3  2008/04/23 20:35:48  smithr
 * get_designated was incorrectly adjusting the accepting list by comparing
 * to the local "accept" set (which contains accept_ids) rather than the
 * set of equivalent states.  Thanks to Daniel Luchaup for identifying and
 * narrowing down the problem.
 *
 * Revision 1.2  2008/02/13 20:33:10  smithr
 * *** empty log message ***
 *
 * Revision 1.1  2007/08/07 18:00:51  smithr
 * initial check in to CVS
 *
 * Revision 1.9  2006/12/22 00:05:18  smithr
 * Many changes to many files, mostly revolving around:
 * (1) addition of compressed edges to reduce memory cost.
 * (2) addition of returns to get detailed memory requirements.
 *
 * Revision 1.8  2006/10/02 16:26:29  smithr
 * changed a printf to a dprintf...
 *
 * Revision 1.7  2006/09/29 23:01:05  smithr
 * make_equivalent was actually broken in that it was not producing
 * equivalence classes.  This has been fixed, and check_equivalent has
 * been added to verify that equivalence classes are actually being
 * produced.
 *
 * Revision 1.6  2006/09/26 17:09:58  smithr
 * Fixed a nefarious memory bug.  During state machine combination, the
 * annotation pointers were getting copied, but it was just a pointer
 * copy, not a deep copy.  As a result, during de-allocation the
 * pointers were getting double free'd, etc.  The solution was to provide
 * copy constructors and destructors that performed deep copies "all
 * the way down" the nested actions.  This seems to solve the memory
 * leak and other memory access problems that were occurring.
 *
 * Revision 1.5  2006/07/26 22:55:50  smithr
 * minimization working, although it is not very fast.  This is due to
 * the pair-storage repository being employed.  A solution based on nodes
 * as integers might yield results, since lookup() can be reduced to an
 * O(1) operation.
 *
 * Revision 1.4  2006/07/25 17:54:38  smithr
 * using profiler, streamlined some operations.  The set<> container
 * is faster than the hash_set<> container for holding dependency
 * lists, so using that.
 *
 * Revision 1.3  2006/07/25 14:51:53  smithr
 * Minimization appears to be working now, after additional bug fixes.
 * It is a huge memory hog.  Additionally, it is rather slow.  Need to
 * try to speed up and reduce memory consumption.
 *
 * Revision 1.2  2006/07/24 22:53:56  smithr
 * minimization appears to be fully functional (that is, you call the
 * routine, and you get back a minimized dfa.
 *
 * Revision 1.1  2006/07/21 23:08:16  smithr
 * Initial check-in to CVS
 *
 *
 *---------------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <list>
#include <string>
#include <set>
#include <map>
#include <vector>
#include <ext/hash_map>
#include <ext/hash_set>
#include <algorithm>
//#include <sstream>
#include <assert.h>
#include "nfa.h"
#include "globals.h"
#include "dprintf.h"

using namespace std;
using namespace __gnu_cxx;

/* specify the internal data structures to use
 * For holding dependency lists, the stl set performed better than
 * the hash_set so use the regular set (#undef the hash set).
 *
 * The data structures for using a regular map are broken for the unordered
 * pair storage are broken.  DO NOT use them. */

/*-----------------------------------------------------------------------------
 *
 *  T Y P E S
 *
 *---------------------------------------------------------------------------*/
/* As with combined nodes, we need the following things in order
 * to use a hash-map:
   1.  a key (comb_state_id_key_t) 
   2.  the data (mark_state_id_data_t) 
   3.  an equality operator (comb_state_id_key_eq_t) 
   4.  a hash function */


typedef struct comb_state_id_key_t 
{
      comb_state_id_key_t(void) { first = second = 0;}
      comb_state_id_key_t(const comb_state_id_key_t& c) { 
	 first = c.first; second = c.second;}
      comb_state_id_key_t(state_id_t one, state_id_t two) {
	 first = one; second = two; }
      
      bool operator<(const comb_state_id_key_t& rhs) const
      {
	 return (first < rhs.first ? 
		 true : 
		 (first == rhs.first ? 
		    (second < rhs.second) : 
		    false));
      }

      state_id_t first;
      state_id_t second;
      
} comb_state_id_key_t;


/* operator for hash_map equality
 * Note that the order of the elements is insignificant here  */
typedef struct comb_state_id_key_eq_no_order_t
{
      bool operator()(const comb_state_id_key_t& k1,
		      const comb_state_id_key_t& k2) const {
	 return ((k1.first == k2.first && k1.second == k2.second) ||
	 	 (k1.first == k2.second && k1.second == k2.first));
      }
} comb_state_id_key_eq_no_order_t;


/* operator for hash_map hashing */
typedef struct comb_state_id_key_hash_t
{
      size_t operator()(const comb_state_id_key_t& k1) const {
	 unsigned long a = (unsigned long)k1.first;
	 unsigned long b = (unsigned long)k1.second;
	 
	 return (a ^ b) % 9241; //4201;
      }
} comb_state_id_key_hash_t;



typedef set<comb_state_id_key_t> dep_set_t;
typedef set<comb_state_id_key_t>::iterator dep_set_iter_t;

typedef struct mark_state_id_data_t  
{
      mark_state_id_data_t(void) { marked = 0; }
      mark_state_id_data_t(const mark_state_id_data_t& m) 
      {
	 dependencies = m.dependencies;
	 marked = m.marked;
      }

      dep_set_t dependencies;
      unsigned char marked;
} mark_state_id_data_t;



/* marked_nodes is the repository for pairs of states that have been 
 * combined.  */

typedef hash_map<comb_state_id_key_t, 
	 mark_state_id_data_t, 
	 comb_state_id_key_hash_t, 
	 comb_state_id_key_eq_no_order_t> marked_nodes_t;


/* an iterator into the combined node repository */
typedef hash_map<comb_state_id_key_t, 
	 mark_state_id_data_t, 
	 comb_state_id_key_hash_t, 
	 comb_state_id_key_eq_no_order_t>::iterator marked_nodes_iter_t;


/*-----------------------------------------------------------------------------
 *
 *  F U N C T I O N   P R O T O S 
 *
 *---------------------------------------------------------------------------*/
void nfa_minimize(nfa_t *nfa);
void mark_list(marked_nodes_t& mn, state_id_t p, state_id_t q);
void mark(marked_nodes_t& mn, state_id_t p, state_id_t q);
void algorithm_body(nfa_t* n, marked_nodes_t& mn, state_id_t p, state_id_t q);

comb_state_id_key_t lookup(marked_nodes_t& mn, state_id_t p, state_id_t q,
			     mark_state_id_data_t **csd);
comb_state_id_key_t lookup(marked_nodes_t& mn, state_id_t p, state_id_t q);

void nfa_split(nfa_t *nfa,list<state_id_t>& final, list<state_id_t>& nonfinal);
void add_to_list(marked_nodes_t& mn, comb_state_id_key_t& list_owner, 
		 state_id_t p, state_id_t q);

void dump_equivalent(list< set< state_id_t> >& classes);
void dump_equivalent_set(set<state_id_t>& s, unsigned int i);
void make_equivalent(marked_nodes_t& mn, list< set< state_id_t> >& classes);
void dump_hashtable(marked_nodes_t& mn);
state_id_t get_designated(nfa_t*, const set<state_id_t>& s, state_id_t start);
void apply_equivalent(nfa_t *n, list< set< state_id_t> >& classes);
void check_equivalent(list< set< state_id_t> >& classes);
void compact(nfa_t *n, set<state_id_t>& dead_states);

void hash_table_cleanup(marked_nodes_t& mn);
static state_id_t get_transition(nfa_t* nfa, state_id_t src, unsigned i);

/*-----------------------------------------------------------------------------
 *
 *  F I L E   G L O B A L S
 *
 *---------------------------------------------------------------------------*/
#define HASH_TABLE_SIZE 3000

bool min_verbose = false;



/*-----------------------------------------------------------------------------
 * nfa_minimize
 *   The main routine that minimizes a machine.  Numbered comments (1), (2),
 *   etc. correspond to lines in the algorithm from the book.
 *   
 *---------------------------------------------------------------------------*/
void nfa_minimize(nfa_t *nfa)
{
   list<state_id_t> final, nonfinal;
   list<state_id_t>::iterator f_iter, n_iter, it1, it2, it3;
   marked_nodes_t marked_nodes;
   marked_nodes_iter_t m_iter;
   
   assert(nfa);

   printf("    Marking Nodes.\n");
   nfa_split(nfa, final, nonfinal);

   marked_nodes.resize(HASH_TABLE_SIZE);

   /* (1) mark the <final, non-final> pairs */
   for (f_iter = final.begin(); f_iter != final.end(); f_iter++) {
      for (n_iter = nonfinal.begin(); n_iter != nonfinal.end(); n_iter++) {
	 comb_state_id_key_t key(*f_iter, *n_iter);
	 mark_state_id_data_t data;

	 if ((m_iter = marked_nodes.find(key)) != marked_nodes.end())
	    assert(0);

	 data.marked = 1;
	 marked_nodes[key] = data;
      }
   }

   if (min_verbose) {
      printf("Dumping hash table after step 1:\n");
      dump_hashtable(marked_nodes);
      printf("done.\n");
   }

   /* (2a) - for (p,q) in FxF */
   for (it1 = final.begin(); it1 != final.end(); it1++) 
   {
      it3 = ++it1; --it1;
      for (it2 = it3; it2 != final.end(); it2++) 
      {
	 state_id_t p = *it1;
	 state_id_t q = *it2;

	 if (min_verbose) 
	    printf("Considering <%u,%u>:", p, q);
	 algorithm_body(nfa, marked_nodes, p,q);
      }
   }

   if (min_verbose) {
      printf("Dumping hash table after step 2(a):\n");
      dump_hashtable(marked_nodes);
      printf("done.\n");
   }

   /* (2b) - for (p,q) in (Q-F)x(Q-F) */
   for (it1 = nonfinal.begin(); it1 != nonfinal.end(); it1++) {
      it3 = ++it1; --it1;
      for (it2 = it3; it2 != nonfinal.end(); it2++) 
      {
	 state_id_t p = *it1;
	 state_id_t q = *it2;

	 if (min_verbose)
	    printf("Considering <%u,%u>:", p, q);
	 algorithm_body(nfa, marked_nodes, p,q);
      }
   }

   if (min_verbose) {
      printf("Dumping hash table after step 2(b):\n");
      dump_hashtable(marked_nodes);
      printf("done.\n");
   }

   printf("    Performing reduction.\n");
   list< set< state_id_t> > classes;
   make_equivalent(marked_nodes, classes);
   check_equivalent(classes);
   apply_equivalent(nfa, classes);

   printf("    Minimization cleanup.\n");
   hash_table_cleanup(marked_nodes);
}


/*-----------------------------------------------------------------------------
 * algorithm_body
 *   body of the algorithm, performing steps (3) through (7)
 *---------------------------------------------------------------------------*/
void algorithm_body(nfa_t *nfa, marked_nodes_t& mn, state_id_t p, state_id_t q)
{
   marked_nodes_iter_t m_iter;
   comb_state_id_key_t searchkey;
   bool marked;
   
   /* (3) */
   marked = false;
   for (unsigned int i=0; i < MAX_SYMS; i++) 
   {
      searchkey.first  = get_transition(nfa, p, i); //p->trans[i];
      searchkey.second = get_transition(nfa, q, i); //q->trans[i];

      m_iter = mn.find(searchkey);
      if ( (m_iter != mn.end()) && (m_iter->second.marked))
      {
	 /* (4) */
	 //mark(mn, p, q);
	 lookup(mn,p,q); /* this places entry in hash table */
	 //the mark is placed inside the mark_list function.
	 if (min_verbose)
	    printf(" marked, step (4) with symbol %u\n", i);
	 
	 /* (5) */
	 mark_list(mn, p, q);
	 
	 marked = true;
	 break;
      }

      if (m_iter == mn.end())
      {
	 /* we just need to put this key into the hash table since
	  * we have "observed it" but it is not already there */
	 lookup(mn, p, q);
      }


   }
   
   if (!marked) /* no pair (d(p,a), d(q,a) was marked */
   {
      /* (6) */
      for (unsigned int i=0; i < MAX_SYMS; i++)
      {
	 searchkey.first = get_transition(nfa, p, i); //p->trans[i];
	 searchkey.second = get_transition(nfa, q, i); //q->trans[i];

	 if (searchkey.first != searchkey.second)
	 {
	    /* (7) */
	    //if (min_verbose)
	    //   printf(" added to list, step (7) with symbol %u\n", i);
	    add_to_list(mn, searchkey, p, q);
	 }
      }
   }
}


/*-----------------------------------------------------------------------------
 * nfa_split
 *  partitions the states into final and non-final sets.
 *---------------------------------------------------------------------------*/
void nfa_split(nfa_t *nfa,list<state_id_t>& final, list<state_id_t>& nonfinal)
{
   unsigned int i;
   list<state_id_t>::iterator li;

   if (min_verbose) {
      printf("orig.size = %u\n",  nfa->states.size());
      printf("final.size = %u\n", final.size());
      printf("nonfi.size = %u\n", nonfinal.size());
   }

   for (i=0; i < nfa->states.size(); i++)
   {
      li = find(nfa->accepting.begin(), nfa->accepting.end(), i);
      if (li != nfa->accepting.end())
      {
	 final.push_back(i);
      }
      else
      {
	 nonfinal.push_back(i);
      }
   }

   if (min_verbose) {
      printf("orig.size = %u\n",  nfa->states.size());
      printf("final.size = %u\n", final.size());
      printf("nonfi.size = %u\n", nonfinal.size());
   }

   
   assert(final.size() == nfa->accepting.size());
   assert(final.size() + nonfinal.size() == nfa->states.size());
}


/*-----------------------------------------------------------------------------
 *add_to_list
 *  Implements the function of step (7):  adding a pair to the list of
 *  another pair, signifying a dependency.
 *---------------------------------------------------------------------------*/
void add_to_list(marked_nodes_t& mn,
		 comb_state_id_key_t& list_owner, 
		 state_id_t p, state_id_t q)
{
   comb_state_id_key_t  owner;
   comb_state_id_key_t  dep;
   mark_state_id_data_t *owner_data;


   owner = lookup(mn, list_owner.first, list_owner.second, &owner_data);
   dep   = lookup(mn, p, q);

   //owner_data->dependencies.push_back(dep);
   owner_data->dependencies.insert(dep);

   if (min_verbose)
      printf("Adding pair <%u,%u> to list of <%u,%u>\n",
	     dep.first, dep.second, owner.first, owner.second);
}


/*-----------------------------------------------------------------------------
 * lookup
 *   looks up a state in the hash table and returns the corresponding key.
 *   The data value is thrown away.
 *---------------------------------------------------------------------------*/
comb_state_id_key_t lookup(marked_nodes_t& mn, state_id_t p, state_id_t q)
{
   mark_state_id_data_t *csd;
   return lookup(mn, p, q, &csd);
}


/*-----------------------------------------------------------------------------
 * lookup v2
 *   looks up a state in the hash table, returning both the corresponding
 *   key and the data value.
 *
 *   This probably isn't the safest code - we are returning the address of
 *   a pointed-to object, which object may change, etc., as the map is
 *   modified.  I should probably just have separate lookup functions.
 *---------------------------------------------------------------------------*/
comb_state_id_key_t lookup(marked_nodes_t& mn, state_id_t p, state_id_t q,
			   mark_state_id_data_t** csd)
{
   comb_state_id_key_t searchkey;
   marked_nodes_iter_t m_iter;

   searchkey.first = p;
   searchkey.second = q;
   
   m_iter = mn.find(searchkey);
   if (m_iter == mn.end())
   {
      comb_state_id_key_t  newkey(p,q);
      mark_state_id_data_t newdata;
      
      mn[newkey] = newdata;
      
      *csd = &mn[newkey];
      return newkey;
   }
   
   *csd = &m_iter->second;
   return (m_iter->first);
}


/*-----------------------------------------------------------------------------
 * mark
 *  Marks a node.
 *---------------------------------------------------------------------------*/
void mark(marked_nodes_t& mn, state_id_t p, state_id_t q)
{
   comb_state_id_key_t pair;
   mark_state_id_data_t *data;

   pair = lookup(mn, p, q, &data);
   data->marked = 1;
}


/*-----------------------------------------------------------------------------
 * mark
 *  recursively marks the list of dependencies for a given node.
 *---------------------------------------------------------------------------*/
void mark_list(marked_nodes_t& mn, state_id_t p, state_id_t q)
{
   comb_state_id_key_t searchkey(p,q);
   marked_nodes_iter_t m_iter;

   m_iter = mn.find(searchkey);
   if (m_iter == mn.end())
   {
      /* this is an error.  All pairs that have been seen, marked or not,
	 should be in the container */
      printf("Could not find pair <%u,%u>\n", p, q);
      assert(0);
   }
   else
   {
      /* Step (5) of Hopcroft--Ullman algorithm reads:
       * "recursively mark all unmarked pairs on the list for (p,q) and
       * on the lists of other pairs othat are marked at this step".  This
       * means that you _cannot_ blindly mark all nodes on the list.  If you
       * do, then test case z3 (see below) will lead to infinite recursion
       */
      if (m_iter->second.marked == 0)
      {
	 m_iter->second.marked = 1;
	 
	 dep_set_iter_t deps;
	 deps = m_iter->second.dependencies.begin();
	 while (deps != m_iter->second.dependencies.end())
	 {
	    comb_state_id_key_t key = *deps;
	    mark_list(mn, key.first, key.second);
	    deps++;
	 }
      }
   }
}


/*-----------------------------------------------------------------------------
 * hash_table_cleanup
 *---------------------------------------------------------------------------*/
void hash_table_cleanup(marked_nodes_t& mn)
{
   marked_nodes_iter_t i;
   if (min_verbose) 
      printf("Erasing minimization hash table...\n");

   /* should be able to eliminate this while loop now */
   i = mn.begin();
   while (i != mn.end())
   {
      comb_state_id_key_t kt = (*i).first;
      mark_state_id_data_t dt = (*i).second;
      i++;
   }
   mn.clear();
   if (min_verbose)
      printf("done\n");
}


/*-----------------------------------------------------------------------------
 * dump_equivalent
 *---------------------------------------------------------------------------*/
void dump_hashtable(marked_nodes_t& mn)
{
   marked_nodes_iter_t i;

   for (i = mn.begin(); i != mn.end(); i++)
   {
      comb_state_id_key_t kt = (*i).first;
      mark_state_id_data_t dt = (*i).second;

      printf("<%u,%u>", kt.first, kt.second);

      if (dt.marked) {
	 printf(" M");
      }
      printf("\n");
   }
}


/*-----------------------------------------------------------------------------
 * make_equivalent
 *   Creates full equivalence classes from the pairwise equivalence classes
 *   supplied by the hopcraft-ullman algorithm.
 *---------------------------------------------------------------------------*/
void make_equivalent(marked_nodes_t& mn, list< set< state_id_t> >& classes)
{
   list< set< state_id_t> >::iterator l_iter;
   
   marked_nodes_iter_t m_iter;

   for (m_iter = mn.begin(); m_iter != mn.end(); m_iter++)
   {
      comb_state_id_key_t key( (*m_iter).first);
      mark_state_id_data_t data( (*m_iter).second);
      bool inserted = false;
      set<state_id_t> *first, *second;

      first = NULL;
      second = NULL;

      if (data.marked)
	 continue;

      unsigned int i = 0;
      dprintf(stdout,DIAG_HUMIN,"Considering <%u,%u>\n",key.first, key.second);
      for (l_iter = classes.begin(); l_iter != classes.end(); 
	   l_iter++, i++)
      {
	 set< state_id_t>& s = (*l_iter);

	 if (s.find(key.first) != s.end())
	 {
	    dprintf(stdout, DIAG_HUMIN,
		    "class %u: Found first, %u, inserting second, %u\n",
		    i, key.first, key.second);
	    inserted = true;

	    first = &s;
	 }
	 
	 if (s.find(key.second) != s.end())
	 {
	    dprintf(stdout, DIAG_HUMIN,
		    "class %u: Found second, %u, inserting first, %u\n",
		    i, key.second, key.first);
	    inserted = true;

	    second = &s;
	 }

      }

      if (first)
	 first->insert(key.second);
      
      if (second)
	 second->insert(key.first);
      
      /* we have found elements in two existing sets.  Need to combine the
	 sets and remove one of them from the class list. */

      if (first && second && (first != second))
      {
	 /* copy the second set to the first and eliminate the second */

	 //unsigned int size1 = classes.size();
	 first->insert(second->begin(), second->end());
	 classes.remove(*second);
	 //unsigned int size2 = classes.size();
	 //printf("# classes before and after: %u->%u\n", size1,size2);
      }
      
      
      if (!inserted)
      {
	 set< state_id_t> new_class;
	 
	 new_class.insert(key.first);
	 new_class.insert(key.second);
	 classes.push_back(new_class);
	 dprintf(stdout, DIAG_HUMIN, 
		 "Found neither, inserting first, %u, and  second, %u\n",
		 key.first, key.second);
      }
   }
   //if (min_verbose)
      dump_equivalent(classes);
}


/*-----------------------------------------------------------------------------
 * check_equivalent
 *   Checks to makes sure that are equivalence-class producing routine does
 *   indeed produce equivalence classes.
 *---------------------------------------------------------------------------*/
void check_equivalent(list< set< state_id_t> >& classes)
{
   set<state_id_t> all_states;
   list< set < state_id_t> >::iterator li;
   set<state_id_t>::iterator si, lsi;
   unsigned int occurrences = 0;

   /* put all states in one big set */
   for (li = classes.begin(); li != classes.end(); li++)
   {
      all_states.insert( (*li).begin(), (*li).end());
   }
   
   /* for each state in the set, check that it occurs exactly once */
   for (si = all_states.begin(); si != all_states.end(); si++)
   {
      occurrences = 0;
      for (li = classes.begin(); li != classes.end(); li++)
      {
	 if ((*li).find(*si) != (*li).end())
	    occurrences++;
      }

      if (occurrences != 1)
      {
	 printf("Occurrences = %u for state %u\n", occurrences, (*si));
	 diag_add_interest(DIAG_HUMIN);
	 dump_equivalent(classes);
	 assert(0);
      }
   }
}


/*-----------------------------------------------------------------------------
 * apply_equivalent
 *   Adjusts the NFA to contain just the equivalence classes.
 *---------------------------------------------------------------------------*/
void apply_equivalent(nfa_t *nfa, list< set< state_id_t> >& classes)
{
   list< set< state_id_t> >::iterator l;
   unsigned int state_i;
   set< state_id_t> dead_states;

   for (l=classes.begin(); l!=classes.end(); l++)
   {
      set<state_id_t>& eqcl = (*l);
      set<state_id_t>::iterator si;
      state_id_t d = get_designated(nfa, eqcl, nfa->start);

      assert(d < nfa->states.size());

      /* replace all transitions to states in this equivalence class
       * with transition to the designated state */
      for (state_i = 0; state_i < nfa->states.size(); state_i++)
      {
	 for (unsigned int sym=0; sym < MAX_SYMS; sym++)
	 {
	    /* if the transition destination is in our current set, then
	       replace it with d, the designated transition. */
	    if ( eqcl.find( get_transition(nfa, state_i, sym) ) != eqcl.end())
	    {
	       assert(nfa->states[state_i].trans[sym].size() == 1);
	       nfa->states[state_i].trans[sym].clear();
	       nfa->states[state_i].trans[sym].push_back(d);
	    }
	 }

	 /* remove the equivalent states from the list. */
	 if ( (eqcl.find(state_i) != eqcl.end()) && (state_i != d))
	 {
	    dead_states.insert(state_i);
	    //state_i = nfa->states.erase(state_i);
	 }
      }
   }

   /* output the set of dead states */
   set<state_id_t>::iterator si;
   if (dead_states.size() > 0)
   {
      fprintf(stdout, "Dead States: ");
      for (si = dead_states.begin(); si != dead_states.end(); si++) 
      {
	 fprintf(stdout, "%u ", *si);
      }
      fprintf(stdout, "\n");
   }

   /* now, compact the result */
   fprintf(stdout, "# states before compaction: %u\n", nfa->states.size());
   compact(nfa, dead_states);
   fprintf(stdout, "# states after compaction: %u\n", nfa->states.size());

}


/*-----------------------------------------------------------------------------
 * get_designated
 *
 *  s == equivalence class of states that can be replaced by a single state,
 *       the designated state.
 *---------------------------------------------------------------------------*/
state_id_t get_designated(nfa_t *nfa,const set<state_id_t>& s,state_id_t start)
{
   state_id_t d;
   set<state_id_t>::iterator si;

   if (s.begin() == s.end())
   {
      assert(0);
      return (state_id_t)-1;
   }

   /* pick the designated state.  If the start state is present
    * in an equivalence class, always pick it */
   if (s.find(start) != s.end())
      d = start;
   else
      d = *(s.begin());

   /* need to copy all accept_id's in the equivalence class to the
    * new designated state. First place in a set (to ensure uniqueness),
    * then copy to a list. */

   /* these are accept_ids, NOT accepting states! */
   set<state_id_t> accept;   
   set<state_id_t>::iterator ai;
   for (si = s.begin(); si != s.end(); si++)
   {
      accept.insert(nfa->states[*si].accept_id.begin(),
		    nfa->states[*si].accept_id.end());
   }

   nfa->states[d].accept_id.clear();
   for (ai = accept.begin(); ai != accept.end(); ai++)
   {
      nfa->states[d].accept_id.push_back(*ai);
   }

   /* now, update the machine-wide list of accepting states.  Remove all
    * entries to states that are dead and replace them with an entry
    * for the designated state if it is accepting.*/
   list<state_id_t>::iterator li;
   bool replace = false;

   li = nfa->accepting.begin();
   while (li != nfa->accepting.end())
   {
      if (s.find(*li) != s.end())
      {
	 li = nfa->accepting.erase(li);
	 replace = true; /* erased an equiv state, need to add designated */
      }
      else
	 li++;
   }

   if (replace)
   {
      /* add the designated state back in */
      nfa->accepting.push_back(d);
   }

   return d;
}


/*-----------------------------------------------------------------------------
 * dump_equivalent
 *---------------------------------------------------------------------------*/
void dump_equivalent(list< set< state_id_t> >& classes)
{
   list< set< state_id_t> >::iterator l_iter;
   unsigned int i=0;

   for (l_iter = classes.begin(), i=0; l_iter != classes.end(); l_iter++, i++)
   {
      set< state_id_t>& s = (*l_iter);

      dump_equivalent_set(s, i);
   }
}


void dump_equivalent_set(set<state_id_t>& s, unsigned int i)
{
   set< state_id_t>::iterator s_iter;
   
   dprintf(stdout, DIAG_HUMIN, "Class %u\n",i);
   for (s_iter = s.begin(); s_iter != s.end(); s_iter++)
   {
      dprintf(stdout, DIAG_HUMIN, "  %u\n", (*s_iter));
   }
   dprintf(stdout, DIAG_HUMIN, "\n");
}


/*-----------------------------------------------------------------------------
 * compact
 *  After minimization has completed, there may be several "dead states" that
 *  have been removed which are manifest as empty entries in the vector 
 *  of states.  This routine fills in the holes by taking entries from the
 *  end and inserting them into dead states.
 *---------------------------------------------------------------------------*/
void compact(nfa_t *n, set<state_id_t>& dead_states)
{
   set<state_id_t>::iterator li;
   list<state_id_t>::iterator it;
   unsigned int i, j;
   unsigned int last;
   unsigned int removed;  /* not necessary */

   removed = 0;
   last = n->states.size() - 1;
   for (li = dead_states.begin(); li != dead_states.end(); li++)
   {
      /* iterate until we find a live state to replace */
      while (dead_states.find(last) != dead_states.end())
	 last--;
      
      /* at this point, we have nothing left but dead states, so we
	 are done. */
      if (last < *li)
	 break;

      /* copy the state structure over */
      n->states[*li] = n->states[last];
      fprintf(stdout, "Replacing %u with %u\n", *li, last);
      removed++;

      /* redirect all transitions pointing to last to point to the new
	 slot. */
      for (i=0; i < last; i++)
      {
	 for (j=0; j < MAX_SYMS; j++)
	 {
	    list<state_id_t> new_trans;
	    /* There should be exactly 1 edge for each symbol.
	     * Whether there are more than one or not, this routine will
	     * still work. */
	    for (it = n->states[i].trans[j].begin(); 
		 it != n->states[i].trans[j].end(); it++)
	    {
	       if (*it == last)
	       {
		  new_trans.push_back(*li);
		  //fprintf(stdout, "in state %u, replaced trans %u with %u\n",
		  //  i, *it, *li);
	       }
	       else
	       {
		  new_trans.push_back(*it);
		  //fprintf(stdout, "in state %u, no replace trans %u\n",
		  //  i, *it);
	       }
	    }
	    n->states[i].trans[j] = new_trans;
	 }

	 if (!n->states[i].eps_trans.empty())
	 {  
	    /* again, should be empty, but we keep the function general */
	    list<state_id_t> new_trans;
	    for (it = n->states[i].eps_trans.begin(); 
		 it != n->states[i].eps_trans.end(); it++)
	    {
	       if (*it == last)
		  new_trans.push_back(*li);
	       else
		  new_trans.push_back(*it);
	    }
	    n->states[i].eps_trans = new_trans;
	 }
      }

      /* if last was listed as an accepting state, then we need to replace */
      list<state_id_t> new_accept;
      for (it = n->accepting.begin(); it != n->accepting.end(); it++)
      {
	 if (*it == last)
	    new_accept.push_back(*li);
	 else
	    new_accept.push_back(*it);
      }
      n->accepting = new_accept;

      last--;
   }

   /* finally, resize the vector.  This way avoids reallocation. */
   assert(dead_states.size() >= removed);
   assert(dead_states.size() < n->states.size());
   for (i=0; i < dead_states.size(); i++)
   {
      n->states.pop_back();
   }
   n->next_id -= dead_states.size();
}


/*-----------------------------------------------------------------------------
 * get_transition
 *   gets the next transition from the current state.  Rules are based on
 *   the following:
 *   (1) if trans exists, return it
 *   (2) else if default trans exists, return it
 *   (3) else, return the start state.
 *---------------------------------------------------------------------------*/
inline state_id_t get_transition(nfa_t* nfa, state_id_t src, unsigned i)
{
   //assert(rca);
   //assert(src);
   //assert(i < MAX_SYMS);
   assert(!nfa->states[src].trans[i].empty());

   return nfa->states[src].trans[i].front();
}
