/*------------------------------------------------------------------------
 * File:	kgram_combine_rdt_trans.cc
 *
 * Author:	Liu Yang
 * Date:	Oct 18, 2009
 *
 * Combine individual transition tables into a big one (state number will
 * be renamed during this process)
 *------------------------------------------------------------------------
 * $Log: kgram_combine_rdt_trans.cc,v $
 * Revision 1.3  2010/09/27 14:21:07  lyangru
 * final synchronization
 *
 *
 * */
#include <sys/types.h>
#include <sys/dir.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <cstdlib>
#include <string>
#include <fstream>
#include <cstdlib>
#include <algorithm>
#include <string.h>
#include "clock.h"
#include "kgram.h"

#define DEBUG

#define FALSE 0
#define TRUE !FALSE

using namespace std;

extern int alphasort();
int file_select(const struct dirent *entry);

int main(int argc, char **argv) {
  struct dirent **files;
  int i, j, k, count, cur_num_sig;
  int offset = 0;
  int num_starts = 0, num_trans = 0, num_states = 0, cur_num_starts;
  char trans_file[1024], trans_hdr[1024];
  ifstream in_file;
  ofstream out_file;
  unsigned int tmp_char0, tmp_char1, tmp_new_alpha, tmp_start, cur_num_states, cur_num_trans, cur_sig_id;
  unsigned int cur_st, cur_sym, num_next_sts, digram_alpha_size, tmp_next_st, num_acc_ids;
  string cur_kgram, str_sig_id;
  bool f_filled_class_c = false;
  set<unsigned int> starts;
  set<unsigned int>::iterator its;
  /* map kgram to unsigned int */
  map<string, unsigned int> class_c;
  map<string, unsigned int>::iterator itm;
  multimap<unsigned int, unsigned int> accept_ids;
  multimap<unsigned int, unsigned int>::iterator itm2;
  int acc_id_offset = 0;	 
  unsigned int tmp_st, tmp_acc_id;
  

  if (argc != 3) {
    cerr << "Usage: kgram_combine_rdt_trans <path for individual trans tables> <file name of combined transitions>" << endl;
    exit(0);
  }

  count = scandir(argv[1], &files, file_select, alphasort);
#ifdef DEBUG
  cout << "count = " << count << endl;
#endif
  if (count <= 0) {
    printf("Invalid path name %s or empty directory\n", argv[1]);
    exit(0);
  }

  out_file.open(argv[2]);
  if (!out_file) {
    cerr << "Failed to open " << argv[2] << endl;
    exit(0);
  }

  for (i = 0; i < count; i++) {
    memset(trans_file, 0, 1024);
    strcpy(trans_file, argv[1]);
    trans_file[strlen(argv[1])] = '/';
    strcpy(trans_file + strlen(argv[1]) + 1, files[i]->d_name);
    in_file.open(trans_file);
    if (!in_file) {
      cerr << "Failed to open " << trans_file << endl;
      exit(0);
    }
    /* read num of states */
    in_file >> cur_num_states >> digram_alpha_size;
    num_states += cur_num_states;
#ifdef DEBUG
  cout << "i = " << i << " cur_num_states = " << cur_num_states << endl;
#endif
    
    /* read the alphabet mapping */
    if (!f_filled_class_c) {
      for (j = 0; j < digram_alpha_size; j++) {
        in_file >> tmp_char0 >> tmp_char1 >> tmp_new_alpha;
        cur_kgram.append(1, (unsigned char)tmp_char0);
        cur_kgram.append(1, (unsigned char)tmp_char1);
        class_c.insert(pair<string, unsigned int>(cur_kgram, tmp_new_alpha));
        cur_kgram.clear();
      }
      f_filled_class_c = true;
    } else {
      for (j = 0; j < digram_alpha_size; j++) {
        in_file >> tmp_char0 >> tmp_char1 >> tmp_new_alpha;
      }
    }

    /* read the accept ids */
    in_file >> cur_num_sig;
    for (j = 0; j < cur_num_sig; j++) {
      in_file >> tmp_st >> tmp_acc_id;
      accept_ids.insert(pair<unsigned int, unsigned int>(tmp_st + offset, tmp_acc_id + acc_id_offset));  
    }
    acc_id_offset += cur_num_sig;
 
    /* read starting states */
    in_file >> cur_num_starts;
    num_starts += cur_num_starts;
    for (j = 0; j < cur_num_starts; j++) {
      in_file >> tmp_start;
      starts.insert(tmp_start + offset);
    }
    
    /* read the num of transitions */
    in_file >> cur_num_trans;
    num_trans += cur_num_trans;
    for (j = 0; j < cur_num_trans; j++) {
      in_file >> cur_st >> cur_sym >> num_next_sts;
      out_file << cur_st + offset << " " << cur_sym << " " << num_next_sts << " ";
      for (k = 0; k < num_next_sts; k++) {
        in_file >> tmp_next_st;
        out_file << tmp_next_st + offset << " ";
      }
      in_file >> num_acc_ids;
      out_file << num_acc_ids << " ";
      if (num_acc_ids > 0) {
	in_file >> str_sig_id;
	out_file << " sig_id ";
        for (k = 0; k < num_acc_ids; k++) {
	  in_file >> cur_sig_id;
	  out_file << cur_sig_id + acc_id_offset - cur_num_sig << " ";
        }
      }
      out_file << endl;
    }

    in_file.close();
    offset += cur_num_states;
  }  /* end for i */

  out_file.close();

  /* output the transition headers */
  memset(trans_hdr, 0, 1024);
  sprintf(trans_hdr, "%s_info_hdr", argv[2]);
  out_file.open(trans_hdr);
  if (!out_file) {
    cerr << "Failed to open " << trans_hdr << endl;
    exit(0);
  }
  /* output the num of states */
  out_file << num_states << endl;
  /* output the mapping size (for gram size of 2) */
  out_file << 65536 << endl;
  /* output the alphabet mapping */
  for (itm = class_c.begin(); itm != class_c.end(); itm++) {
    out_file << (int)(unsigned char)(itm->first)[0] << " " << (int)(unsigned char)(itm->first)[1] << " " << itm->second << endl;
  }
  /* output the accept ids */
  out_file << accept_ids.size() << endl;
  for (itm2 = accept_ids.begin(); itm2 != accept_ids.end(); itm2++) {
    out_file << itm2->first << " " << itm2->second << endl;
  } 

  /* output starting states */
  out_file << starts.size() << " ";
  for (its = starts.begin(); its != starts.end(); its++) 
    out_file << *its << " ";
  out_file << endl;
  /* output the number of transitions */ 
  out_file << num_trans << endl;

  out_file.close();

}

int file_select(const struct dirent *entry) {
  if ((strcmp(entry->d_name, ".") == 0) ||
       strcmp(entry->d_name, "..") == 0) {
    return (FALSE);
  } else
    return (TRUE);
}

