/*------------------------------------------------------------------------
 * File:	eps_elimination_nfas.cc
 *
 * Author:	Liu Yang
 * Date:	Jan 5, 2010
 *
 * Perform epsilon elimination from NFA's generated by res2nfas.cc
 *------------------------------------------------------------------------
 * $Log $
 *
 * */

#include <iostream>
#include <fstream>
#include <map>
#include <set>
#include <vector>
#include <string>
#include <cstdlib>
#include "nfa-multimap.h"

//#define DEBUG
#define TRACE_EPS_ELIM

int main(int argc, char** argv) {

  ifstream nfas_file;
  ofstream out_file;
  multimap_nfa *mynfa;
  int start, end, i, state_offset = 0;
  int tmp_st, tmp_acc_id, num_accepting_states, tmps, s_start;
  int **T_transitions;
  char hdr_file[1024];

  /* file header information for the combined NFA*/
  int total_num_states = 0, total_num_accept_ids = 0, total_num_starts = 0, total_num_transitions = 0;
  set<int> all_starts;
  set<int> all_accepting_states;
  multimap<int, int> all_accept_ids;

  vector<int>::iterator it;
  set<int>::iterator it2;
  multimap<int, int>::iterator ait;

  if (argc !=3) {
    cerr << "Usage: %s <NFA file name> <output file name>" << endl;
    exit(1);
  }


  nfas_file.open(argv[1]);
  if (!nfas_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }
  
  start = cputime();  
  while (!nfas_file.eof()) {
    /*---------- INPUT DATA OF ONE NFA --------------*/
    mynfa = new multimap_nfa();
    /* input number of states, number of accepting ids, and accepting ids */
    nfas_file >> mynfa->num_states >> mynfa->num_accept_ids;
    if (nfas_file.eof())
      break;
#ifdef DEBUG
  cout << "num_states " << mynfa->num_states << " num_accept_ids " << mynfa->num_accept_ids << endl;
#endif
    for (i = 0; i < mynfa->num_accept_ids; i++) {
      nfas_file >> tmp_st >> tmp_acc_id;
      mynfa->accept_ids.insert(pair<int, int>(tmp_st, tmp_acc_id));
      //all_accept_ids.insert(pair<int, int>(tmp_st + state_offset, tmp_acc_id));
    }
    /* input number of accepting states, and accepting states */
    nfas_file >> num_accepting_states;
    for (i = 0; i < num_accepting_states; i++) {
      nfas_file >> tmps;
      mynfa->accepting_states.push_back(tmps);
      //all_accepting_states.push_back(tmps + state_offset);
    }
    /* input starting state and number of transitions */
    nfas_file >> s_start >> mynfa->num_transitions;
    mynfa->starts.insert(s_start);

    /* input transitions
     * allocate space for transition table */
    T_transitions = (int **)malloc(mynfa->num_transitions * sizeof(int *));
    if (T_transitions == NULL)
      perror("out of memory"), exit(0);

    for (i = 0; i < mynfa->num_transitions; i++) {
      T_transitions[i] = (int *)malloc(NUM_COLUMN * sizeof(int));
      if (T_transitions[i] == NULL)
        perror("out of memory"), exit(0);
    }
    /* read the transitions */
    for (i = 0; i < mynfa->num_transitions; i++) {
#ifdef TRACE_EPS_ELIM
      if ((i % 10) == 0) {
        cout << "Read "<< i << "th transition ..." << endl;
      }
#endif
      nfas_file >> T_transitions[i][0] >> T_transitions[i][1] >> T_transitions[i][2];
    }

    mynfa->transitions.resize(mynfa->num_states);
    for (i = 0; i < mynfa->num_transitions; i++) {
#ifdef TRACE_EPS_ELIM
      if ((i % 10) == 0) {
        cout << "fill out "<< i << "th transition ..." << endl;
      }
#endif
      mynfa->transitions[T_transitions[i][0]].insert(pair<int, int>(T_transitions[i][1], T_transitions[i][2]));
      free(T_transitions[i]);
    }
    free(T_transitions);

    /* -----PERFORM EPSILON ELIMINATION----------- */
    mynfa->epsilon_elimination_improved();

    /* -----extract header info--------------- */
    total_num_states += mynfa->num_states;
    total_num_accept_ids += mynfa->num_accept_ids;
    ait = mynfa->accept_ids.begin();
    while (ait != mynfa->accept_ids.end()) {
      all_accept_ids.insert(pair<int, int>(ait->first + state_offset, ait->second));
      ait++;
    }

    it = mynfa->accepting_states.begin();
    while (it != mynfa->accepting_states.end()) {
      all_accepting_states.insert(*it + state_offset);
      it++;
    }
  
    for (it2 = mynfa->starts.begin(); it2 != mynfa->starts.end(); it2++) {
      if (!mynfa->transitions[*it2].empty()) 
        all_starts.insert(*it2 + state_offset);
    }
    total_num_transitions += mynfa->num_transitions;

    mynfa->out_to_file(argv[2], state_offset);
    state_offset += mynfa->num_states;
    delete mynfa;
  }

  /* -----output the header information----- */
  sprintf(hdr_file, "%s_hdr", argv[2]);
  out_file.open(hdr_file); 
  out_file << total_num_states << endl;
  out_file << total_num_accept_ids << endl;
  ait = all_accept_ids.begin();
  while (ait != all_accept_ids.end()) {
    out_file << ait->first << " " << ait->second << endl;
    ait++;
  }
  out_file << all_accepting_states.size();
  for (it2 = all_accepting_states.begin(); it2 != all_accepting_states.end(); it2++) {
    out_file << " "<< *it2;
  }
  out_file << endl;
  out_file << all_starts.size() << " ";
  for (it2 = all_starts.begin(); it2 != all_starts.end(); it2++) {
    out_file << *it2 << " ";
  } 
  out_file << endl;
  out_file << total_num_transitions << endl;
  out_file.close(); 

  end = cputime();
  cout << "Time for epsilon elimination " << (double) (end - start)/1000 << "seconds" << endl;

  return 0;
}
