/*-----------------------------------------------------------------------------
 * combine.c
 *   Code for combining two state machines, adjusted to handle deterministic
 *   moore machines.
 *
 * Author:  Randy Smith
 * Date:    12 July 2006
 *
 *    Copyright 2006,2007 Randy Smith, smithr@cs.wisc.edu
 *
 *    This file contains unpublished confidential proprietary
 *    work of Randy Smith, Department of Computer Sciences,
 *    University of Wisconsin--Madison.  No use of any sort, including
 *    execution, modification, copying, storage, distribution, or reverse
 *    engineering is permitted without the express written consent of
 *    Randy Smith.
 *
 *
 *---------------------------------------------------------------------------
 * History:
 *  $Log: combine_alg.cc,v $
 *  Revision 1.1  2009/06/09 18:51:21  vinodg
 *  *** empty log message ***
 *
 *  Revision 1.3  2008/04/25 19:49:40  smithr
 *  In function combine(), added code to populate the nfa_t::accepting
 *  list, which wasn't being done.  Thanks to Xiaoyong Chai for
 *  identifying a problem.
 *
 *  Revision 1.2  2008/04/17 21:11:57  smithr
 *  Clean-up algorithm further, simplified some data structures.
 *
 *  Revision 1.1  2008/04/17 14:11:57  smithr
 *  Initial check-in to CVS
 *
 *---------------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <list>
#include <ext/hash_map>
#include <assert.h>
#include "nfa.h"
#include "globals.h"

/*-----------------------------------------------------------------------------
 *
 * T Y P E   D E F S   A N D   D A T A   S T R U C T U R E S
 *
 *---------------------------------------------------------------------------*/
/* Use SGI/GNU_EXT hash_map as the repository for efficiently testing
   whether a cross-producted node has been created yet.  The hashmap
   stores pairs of nodes that have been crossed already.  This
   requires the following data structures: 
   1.  a key (comb_statetype_key_t) 
   2.  the data (comb_statetype_data_t) 
   3.  an equality operator (comb_statetype_key_eq_t) 
   4.  a hash function */

typedef struct comb_statetype_key {

      comb_statetype_key(void) 
      { 
	 first = second = 0;
      }

      comb_statetype_key(const comb_statetype_key& c) 
      { 
	 first = c.first; second = c.second;
      }
      
      comb_statetype_key(state_id_t one, state_id_t two) 
      {
	 first = one; second = two; 
      }

      state_id_t first;
      state_id_t second;
      
} comb_statetype_key_t;


/* operator for hash_map equality
 * Note that the order of the elements is insignificant here  */
typedef struct comb_statetype_key_eq
{
      bool operator()(const comb_statetype_key_t* k1,
		      const comb_statetype_key_t* k2) const {
	 return ((k1->first == k2->first && k1->second == k2->second));
         // ||
	 //	 (k1->first == k2->second && k1->second == k2->first));
      }
} comb_statetype_key_eq_t;


/* operator for hash_map equality
 * Note that the order of the elements is insignificant here  */
typedef struct comb_statetype_key_eq_no_order
{
      bool operator()(const comb_statetype_key_t* k1,
		      const comb_statetype_key_t* k2) const {
	 return ((k1->first == k2->first && k1->second == k2->second) ||
	 	 (k1->first == k2->second && k1->second == k2->first));
      }
} comb_statetype_key_eq_no_order_t;


/* operator for hash_map hashing */
typedef struct comb_statetype_key_hash
{
      size_t operator()(const comb_statetype_key_t* k1) const {
	 unsigned long a = (unsigned long)k1->first;
	 unsigned long b = (unsigned long)k1->second;
	 
	 return (a ^ b) % 9241; //4201;
      }
} comb_statetype_key_hash_t;


typedef std::list<comb_statetype_key_t*> worklist_t;


/*-----------------------------------------------------------------------------
 *
 *               F U N C T I O N   P R O T O T Y P E S
 *
 *---------------------------------------------------------------------------*/
nfa_t* combine(nfa_t* NFA1, nfa_t* NFA2);

static state_id_t get_transition(nfa_t* rca, state_id_t src, unsigned i);
static void copy_accept_ids(const std::list<int>& src, std::list<int>& dest);
static  nfa_t* combine(nfa_t* NFA1, nfa_t* NFA2, unsigned int max, bool& fail);



/*-----------------------------------------------------------------------------
 * combine - combines two state machines into one.
 *   returns a new nfa_t representing the combination of the two inputs, or
 *   0 if there was a problem.
 *   Caller is responsible for deallocating the memory of the new nfa_t.
 *---------------------------------------------------------------------------*/
nfa_t* combine(nfa_t* NFA1, nfa_t* NFA2)
{
   unsigned long max_states = 4000000000UL;
   bool fail = false;
   nfa_t *result = 0;
   
   result = combine(NFA1, NFA2, max_states, fail);

   if (fail && result)
   {
      delete result;
      result = 0;
   }

   return result;
}
   
/*-----------------------------------------------------------------------------
 * combine -- combines two state machines into one.
 * This is the one that does all the work.
 *---------------------------------------------------------------------------*/
static nfa_t* combine(nfa_t* NFA1, nfa_t* NFA2, 
		      unsigned int max_states, bool& fail)
{
   assert(NFA1);
   assert(NFA2);

   if (!NFA1->is_deterministic())
   {
      fprintf(stderr, 
	      "Error in %s (%s:%d): NFA1 is not deterministic. Exiting...\n",
	      __func__, __FILE__, __LINE__);
   }

   if (!NFA2->is_deterministic())
   {
      fprintf(stderr, 
	      "Error in %s (%s:%d): NFA2 is not deterministic. Exiting...\n",
	      __func__, __FILE__, __LINE__);
   }

   /* the worklist keeps track of the nodes that we still need to process */
   worklist_t wl;

   /* comb_nodes is the repository for nodes that have been combined,
      implemented as a hash-map */
   __gnu_cxx::hash_map<comb_statetype_key_t*, 
            state_id_t,
            comb_statetype_key_hash_t, 
            comb_statetype_key_eq_t> comb_nodes;

   /* an iterator into the combined node repository */
   __gnu_cxx::hash_map<comb_statetype_key_t*, 
            state_id_t, 
            comb_statetype_key_hash_t, 
            comb_statetype_key_eq_t>::iterator comb_nodes_iter;


   nfa_t *combined_rca = new nfa_t();
   
   /* key         -- work state 
    * destkey     -- key node corresponding to the trans dest
    * st_data_src -- the combined node corresponding to the key
    * st_data_dst -- the combined node corresponding to the trans dest.
    */
   comb_statetype_key_t *key      = new comb_statetype_key_t;
   comb_statetype_key_t *destkey  = new comb_statetype_key_t;
   state_id_t st_data_src;
   state_id_t st_data_dst;

   unsigned int combined_state_size = 0;
   
   /* initialize the worklist with the combined start states,
    *  add the new entry to the hash list
    *  add the entry to the combined rca */
   {
      unsigned int st_id;

      key->first            = NFA1->start;
      key->second           = NFA2->start;

      st_id                 = combined_rca->add_state();

      copy_accept_ids(NFA1->states[NFA1->start].accept_id, 
		      combined_rca->states[st_id].accept_id);
      copy_accept_ids(NFA2->states[NFA2->start].accept_id, 
		      combined_rca->states[st_id].accept_id);
      st_data_dst = st_id;
   
      wl.push_back(key);
      comb_nodes[key] = st_data_dst;
      combined_state_size++;
      combined_rca->start = st_id;
   }

   /* Main iteration loop: as long as the worklist is not empty, there are
    * "crossed" nodes that have been created but not fully processed yet.
    * Each iteration through the loop removes and processes one node from
    * the worklist; nodes that have been processed are never placed on the
    * worklist again.  Multiple nodes may be added during each iteration,
    * but the process eventually terminates since the number of combined
    * nodes |first| X |second| is finite. */
   while (!wl.empty() && 
	  !(max_states > 0 && combined_state_size >= max_states) )
   {
      /* denote key as <s, t> */
      key = wl.front(); wl.pop_front();

      /* pull the combined state out of our data structure */
      comb_nodes_iter = comb_nodes.find(key);
      assert(!(comb_nodes_iter == comb_nodes.end()));
      st_data_src = (*comb_nodes_iter).second;

      /* now, look at each symbol and build transitions to new combined
	 states as necessary */
      for (unsigned int i=0; i < MAX_SYMS; i++)
      {
	 /* look at the i^th symbol in each machine. */
	 destkey->first  = get_transition(NFA1, key->first, i);
	 destkey->second = get_transition(NFA2, key->second,i);

	 /* Check to see if we have seen the crossed destination
	  * node destkey = <s',t'> before.  
	  * No - create new data node, 
	         add to worklist and to hash table
		 add new combined node to combined rca
	  * Yes - do nothing */

	 if ((comb_nodes_iter = comb_nodes.find(destkey)) == comb_nodes.end())
	 {
	    /* okay, combined destination does not exist yet */
	    comb_statetype_key_t *newkey = new comb_statetype_key_t;
	    unsigned int st_id;

	    newkey->first  = destkey->first;
	    newkey->second = destkey->second;

	    /* create a new, combined destination and copy the accept */
	    st_id = combined_rca->add_state();

	    copy_accept_ids(NFA1->states[destkey->first].accept_id, 
			    combined_rca->states[st_id].accept_id);
	    copy_accept_ids(NFA2->states[destkey->second].accept_id, 
			    combined_rca->states[st_id].accept_id);

	    /* create a new data node and add to worklist and hash table*/
	    st_data_dst = st_id;
	    wl.push_back(newkey);
	    comb_nodes[newkey] = st_data_dst;
	    
	    combined_state_size++;
	 }
	 else
	 {
	    /* the destination node already exists, just get it. */
	    st_data_dst = (*comb_nodes_iter).second;
	 }

	 /* create a transition from the src node in the combined
	  * RCA to the dest node in the combined RCA.*/
	 combined_rca->add_trans(st_data_src, st_data_dst, i);
      }
   }
   delete destkey;

   /* if we exceeded memory, then we need to report as such.  Setting
    * the parameter to 0 indicates we exceeded the available # of states */
   if ( (max_states > 0 && combined_state_size >= max_states) )
   {
      fail = true;
   }

   /* Clean up: iterate through hash function and delete each of the keys.*/
   comb_nodes_iter = comb_nodes.begin();
   while (comb_nodes_iter != comb_nodes.end())
   {
      comb_statetype_key_t* kt = (*comb_nodes_iter).first;
      comb_nodes_iter++;
      delete kt;
   }
   comb_nodes.clear(); /* wipe out the hash map */

   combined_rca->machine_id = NFA1->machine_id + NFA2->machine_id;

   /* update the set of accepting states in the new automata */
   for (unsigned int i=0; i < combined_rca->states.size(); i++)
   {
      if (combined_rca->states[i].accept_id.empty() == false)
      {
	 combined_rca->accepting.push_back(i);
      }
   }

   return combined_rca;
}


/*-----------------------------------------------------------------------------
 * get_transition
 *   asserts that everything is within the proper limits and then returns
 *   the state id.
 *---------------------------------------------------------------------------*/
static state_id_t get_transition(nfa_t* rca, state_id_t src, unsigned i)
{
   assert(rca);
   assert(i < MAX_SYMS);
   assert(rca->states[src].trans[i].size() == 1);

   return rca->states[src].trans[i].front();
}


/*-----------------------------------------------------------------------------
 * copy_accept_ids
 *---------------------------------------------------------------------------*/
static void copy_accept_ids(const std::list<int>& src, std::list<int>& dest)
{
  dest.insert(dest.end(), src.begin(), src.end());

  /*
   list<state_id_t>::iterator li;

   li = src.begin();
   while (li != src.end())
   {
      state_id_t act = *li;
      dest.push_back( act );
      li++;
   }
  */
}
