/*--------------------------------------------------------------
 * mdfa_dump.cc
 *
 * Output the MDFA to a composited transition table
 * num_states = sum(number of states for individual DFA's)
 * starts = union of starts from individual DFA's
 * accepting states = union of accepting states from individual DFA's
 * accept ids = union of accept ids from individual DFA's 
 * num_transitions = sum(number of transitions for individual DFA's)
 * transitions = union of transitions from from individual DFA's
 * Note: state numbers of the 2nd, 3rd, ... DFA's need to 
 * be renamed in the composited transtion table          
 *
 * Author: Liu Yang
 * Date:   Sep 30, 2009
 *
 * Commented the nfa_minimize() operation due to the fact that it
 * may generate multiple accept ids during the minimization.
 * -------------------------------------------------------------
 *  History:
 *  $Log: mdfa_dump.cc,v $
 *  Revision 1.2  2010/09/27 14:48:31  lyangru
 *  final synchronization
 *
 * 
 * ------------------------------------------------------------*/
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <map>
#include <set>
#include <vector>
#include <assert.h>

#include "re.h"
#include "nfa.h"
#include "minimize.h"
#include "globals.h"
#include <sys/time.h>
#include <sys/resource.h>

#include <unistd.h>
#include <errno.h>

#include "clock.h"

#define DEBUG

#define MAX_STRING_LEN	256
#define NUM_OF_TEST_ROUND	1

using namespace std;

/* this is defined in combine_alg.cc */
nfa_t* combine(nfa_t* NFA1, nfa_t* NFA2);

size_t dfa_tab_overhead(nfa_t *nfa);


int cputime();

int main (int argc, char *argv[]) {

  FILE *f = NULL;
  ofstream out_file;
  char buf[4096];
  char *p;
  int line = 1;
  unsigned int sid = 1;
  nfa_t *NFA;
  nfa_t *DFA;
  nfa_t *com_dfa, *tmp_dfa;
  vector<dfa_tab_t> MDFA;
  unsigned int lb_num_states_per_dfa = 2500;	/* lower bound of number of states per combined DFA */

  int t_start, t_end;

  int i = 0, j, k;
  size_t mem_consump = 0;

  unsigned int mdfa_num_states = 0;
  unsigned int state_offset = 0;
  multimap<unsigned int, unsigned int> mdfa_accept_ids;
  set<unsigned int> mdfa_starts;
  set<unsigned int> mdfa_accept_states;
  unsigned int mdfa_num_trans = 0;
  vector< map<unsigned char, unsigned int> > mdfa_transitions;
   
   if (argc != 4) {
     printf("Usage: %s <regex file name> <states limit> <output MDFA file name>\n", argv[0]);
     printf("state limit: the least number of states for each combined DFA\n");
     exit(0);
   }

   if ( (f=fopen(argv[1], "r")) == NULL) {
      printf("could not open file %s\n", argv[1]);
      exit(0);
   }

   lb_num_states_per_dfa = atoi(argv[2]);

   t_start = cputime();

   memset(buf, 0, 4096);
   p = fgets(buf, 4096, f);
   p[strlen(p)-1] = '\0';  /* get rid of trailing \n */
   NFA = re_to_nfa(buf, strlen(buf), line++, sid++);
   DFA = NFA->make_dfa();
   nfa_minimize(DFA);
   tmp_dfa = DFA;
   i++;

   memset(buf, 0, 4096);
   p = fgets(buf, 4096, f);
   while ((p != NULL)) {
      cout << "i = " << i << endl;
      p[strlen(p)-1] = '\0';  /* get rid of trailing \n */
      NFA = re_to_nfa(buf, strlen(buf), line++, sid++);
      DFA = NFA->make_dfa();
      nfa_minimize(DFA);
      i++;
      if (tmp_dfa == NULL) {
	tmp_dfa = DFA;
      } else {
        com_dfa = combine(tmp_dfa, DFA);
        delete tmp_dfa;
        tmp_dfa = com_dfa;
        delete NFA;
	if (com_dfa->states.size() > lb_num_states_per_dfa) {
    	  mdfa_num_states += com_dfa->states.size();
    	  mdfa_starts.insert(com_dfa->start + state_offset);
	/* copy the transitions in com_dfa to composite mdfa */
#ifdef DEBUG
  cout << "num of acc states " << com_dfa->accepting.size() << endl;
#endif
	  for (j = 0; j < com_dfa->states.size(); j++) {
	    map<unsigned char, unsigned int> tmp_trans;
	    for (k = 0; k < MAX_SYMS; k++) {
 	      if (!com_dfa->states[j].trans[k].empty()) {
		/* add a transition to composite table */
		tmp_trans.insert(pair<unsigned char, unsigned int>(k, com_dfa->states[j].trans[k].front() + state_offset));
		mdfa_num_trans++;
	      }
      	    }
	    mdfa_transitions.push_back(tmp_trans);
	    /* add the accept ids if any */
	    if (com_dfa->states[j].accept_id.size() > 0) {
	    /* add j to mdfa_accept_states */
	      mdfa_accept_states.insert(j + state_offset);
	      std::list<int>::const_iterator ai;
	      for (ai = com_dfa->states[j].accept_id.begin();
		   ai != com_dfa->states[j].accept_id.end();
		   ai++) {
		mdfa_accept_ids.insert(pair<unsigned int, unsigned int>(j + state_offset, *ai));
	      }
 	    }
	  }
	  state_offset += com_dfa->states.size();

	  /* estimate memory consumption of this dfa */
	  mem_consump += dfa_tab_overhead(com_dfa);
	  delete com_dfa;
	  tmp_dfa = NULL;
        }
      } /* end else */
      memset(buf, 0, 4096);
      p = fgets(buf, 4096, f);
   }
   fclose(f);

   /* process the last possible combined DFA */
   if (tmp_dfa != NULL) {
    	  mdfa_num_states += tmp_dfa->states.size();
    	  mdfa_starts.insert(tmp_dfa->start + state_offset);
	/* copy the transitions in tmp_dfa to composite mdfa */
	  for (j = 0; j < tmp_dfa->states.size(); j++) {
	    map<unsigned char, unsigned int> tmp_trans;
	    for (k = 0; k < MAX_SYMS; k++) {
 	      if (!tmp_dfa->states[j].trans[k].empty()) {
		/* add a transition to composite table */
		tmp_trans.insert(pair<unsigned char, unsigned int>(k, tmp_dfa->states[j].trans[k].front() + state_offset));
		mdfa_num_trans++;
	      }
      	    }
	    mdfa_transitions.push_back(tmp_trans);
	    /* add the accept ids if any */
	    if (tmp_dfa->states[j].accept_id.size() > 0) {
	    /* add j to mdfa_accept_states */
	      mdfa_accept_states.insert(j + state_offset);
	      std::list<int>::const_iterator ai;
	      for (ai = tmp_dfa->states[j].accept_id.begin();
		   ai != tmp_dfa->states[j].accept_id.end();
		   ai++) {
		mdfa_accept_ids.insert(pair<unsigned int, unsigned int>(j + state_offset, *ai));
	      }
 	    }
	  }

     mem_consump += dfa_tab_overhead(tmp_dfa);
     delete tmp_dfa;
   }

  /* dump the composite transition table */ 
  out_file.open(argv[3]);
  assert(out_file);
  out_file << mdfa_num_states << endl;
  out_file << mdfa_accept_ids.size() << endl;
  /* output the accept id pairs */
  multimap<unsigned int, unsigned int>::iterator ait;
  for (ait = mdfa_accept_ids.begin(); ait != mdfa_accept_ids.end(); ait++) {
    out_file << ait->first << " " << ait->second << endl;
  }
  /* output accept states */
  out_file << mdfa_accept_states.size();
  set<unsigned int>::iterator its;
  for (its = mdfa_accept_states.begin(); its != mdfa_accept_states.end(); its++) {
    out_file << " " << *its;
  }
  out_file << endl;
  /* output the starts */
  out_file << mdfa_starts.size() << " ";
  for (its = mdfa_starts.begin(); its != mdfa_starts.end(); its++) {
    out_file << *its << " ";
  }
  out_file << endl;
  /* output the transitions */
  out_file << mdfa_num_trans << endl;
  assert(mdfa_num_states == mdfa_transitions.size());
  map<unsigned char, unsigned int>::iterator itm;
  for (i = 0; i < mdfa_transitions.size(); i++) {
    for (itm = mdfa_transitions[i].begin(); itm != mdfa_transitions[i].end(); itm++) {
      out_file << i << " " << (unsigned int)(itm->first) << " " << itm->second << endl;
    }
  }
  
  t_end = cputime();

  cout << "mdfa_dump: execution time is " << (double)(t_end - t_start)/(double)100 << " seconds" << endl;
  cout << "Memory consumption of MDFA is " << mem_consump << " bytes" << endl;

  return 0;
}

int cputime()
{
  struct rusage rus;

  getrusage (RUSAGE_SELF, &rus);
  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
}


/*-------------------------------------
 * dfa_tab_overhead: calculates the space overhead of nfa 
 * after being converted to a dfa_tab_t object
 *------------------------------------*/
size_t dfa_tab_overhead(nfa_t *nfa) {
  size_t sz = 0;
  unsigned int num_acc_ids = 0;
  /* space consumed by transition table */
  sz += nfa->states.size() * sizeof(unsigned int) * MAX_SYMS;
  /* space consumed by accepting ids (also counting the space consumed by null pointers) */
  for (unsigned int i = 0; i < nfa->states.size(); i++) {
    num_acc_ids += nfa->states[i].accept_id.size();
  }
  sz += num_acc_ids * sizeof(int) + nfa->states.size() * sizeof(int);
  return sz;
}
