/*------------------------------------------------------------------------
 * File:	kgram_combine_rdt_alpha.cc
 *
 * Author:	Liu Yang
 * Date:	Oct 11, 2009
 *
 * Combine multiple reduced alphabet into one reduced alphabet 
 *------------------------------------------------------------------------
 * $Log: kgram_combine_rdt_alpha.cc,v $
 * Revision 1.4  2010/09/27 14:21:07  lyangru
 * final synchronization
 *
 *
 * */

#include <sys/types.h>
#include <sys/dir.h>
#include <sys/param.h>
#include <stdio.h>
#include <string.h>
#include <string>
#include <fstream>
#include <cstdlib>
#include <algorithm>

#include <set>
#include <map>
#include <vector>

#define DEBUG

#define FALSE 0
#define TRUE !FALSE


using namespace std;

extern int alphasort();
int file_select(const struct dirent *entry);
void fill_alpha_map(map< unsigned int, set<string> > &am, char *file);
void alphabet_combine(set<set<string> > &dst_set, set<set<string> > src_set0, set<set<string> > src_set1);

int main(int argc, char** argv) {

  int count, i;
  struct dirent **files;
  map<unsigned int, set<string> > tmp_alpha_map;
  map<unsigned int, set<string> >::iterator itm;
  vector< map<unsigned int, set<string> > > alpha_maps;
  char trans_file[1024];
  set<set<string> > com_alpha_sets, tmp_alpha_sets, tmp_com_alpha_sets;
  vector<set<set<string> > > src_alpha_sets;
  set<set<string> >::iterator itss;
  set<string>::iterator its;
  ofstream out_file;
  unsigned int new_char = 0;

  if (argc != 3) {
    printf("Usage: %s <path of src_alpha_maps> <combined_alpha_map file name>\n", argv[0]);
    exit(0);
  }

  /* list the transition files with alphabet mapping table */
  count = scandir(argv[1], &files, file_select, alphasort);
  if (count <= 0) {
    printf("Invalid path name %s or empty directory\n", argv[1]);
    exit(0);
  }

  /* load the alphabet mapping tables */
  for (i = 0; i < count; i++) {
    memset(trans_file, 0, 1024);
    strcpy(trans_file, argv[1]);
#ifdef DEBUG
    printf("%s\n", files[i]->d_name);
#endif
    trans_file[strlen(argv[1])] = '/';
    strcpy(trans_file + strlen(argv[1]) + 1, files[i]->d_name);
    fill_alpha_map(tmp_alpha_map, trans_file);
    if (tmp_alpha_map.size() == 0) {
      printf("fill_alpha_map() failed!\n");
      exit(0);
    }
#ifdef DEBUG
  printf("size of compressed alphabet: %d\n", tmp_alpha_map.size());
#endif
    alpha_maps.push_back(tmp_alpha_map);
    tmp_alpha_map.clear();	/* clear for next use */
  }

  for (i = 0; i < alpha_maps.size(); i++) {
    for (itm = alpha_maps[i].begin(); itm != alpha_maps[i].end(); itm++) {
      tmp_alpha_sets.insert(itm->second);
    }
    src_alpha_sets.push_back(tmp_alpha_sets);
    tmp_alpha_sets.clear();
  }

  alphabet_combine(com_alpha_sets, src_alpha_sets[0], src_alpha_sets[1]);
#ifdef DEBUG
  printf("size of the first combined alphabet: %d\n", com_alpha_sets.size());
#endif


  if (alpha_maps.size() > 2) {
    for (i = 2; i < alpha_maps.size(); i++) {
      tmp_com_alpha_sets = com_alpha_sets;
      com_alpha_sets.clear();
      alphabet_combine(com_alpha_sets, tmp_com_alpha_sets, src_alpha_sets[i]);
    }
  } 
#ifdef DEBUG
  printf("size of the final combined alphabet: %d\n", com_alpha_sets.size());
#endif
  
  /* output the final combined alphabet mapping table */
  out_file.open(argv[2]);
  if (!out_file) {
    printf("Failed to open file %s for output!\n", argv[2]);
    exit(0);
  }
  out_file << 65536 << endl;	/* the size of mapping */
  for (itss = com_alpha_sets.begin(); itss != com_alpha_sets.end(); itss++) {
    for (its = itss->begin(); its != itss->end(); its++) {
      out_file << (int)(unsigned char)(*its)[0] << " " << (int)(unsigned char)(*its)[1] << " " << new_char << endl;     
    }
    new_char++;
  }
  out_file.close();

  return 0;
}

int file_select(const struct dirent *entry) {
  if ((strcmp(entry->d_name, ".") == 0) ||
       strcmp(entry->d_name, "..") == 0) {
    return (FALSE);
  } else
    return (TRUE);
}

/* pre: a reduced diagram (two-stride) transition table in file
 *      am is empty
 * post: am is filled out by entries in the form <new char, set of diagrams>
 *      where set of diagrams is a set of equivalent diagrams (two diagrams
 *      xy and uv are considered equivalent if T(q,xy) = T(q, uv) for all q in
 *      Q
 * */   
void fill_alpha_map(map<unsigned int, set<string> > &am, char *file) {
  ifstream nfa_file;
  unsigned int num_states, old_alpha_size, i;
  unsigned tmp_char0, tmp_char1, tmp_new_char;
  string tmp_digram;
  map<unsigned int, set<string> >::iterator itm;
  set<string> tmp_dg_set;

  nfa_file.open(file);
  if (!nfa_file) {
    printf("Failed to open file %s!\n", file);
    exit(0);
  }

  nfa_file >> num_states >> old_alpha_size;
  for (i = 0; i < old_alpha_size; i++) {
    nfa_file >> tmp_char0 >> tmp_char1 >> tmp_new_char;
    tmp_digram.append(1, (unsigned char)tmp_char0);
    tmp_digram.append(1, (unsigned char)tmp_char1);
    itm = am.find(tmp_new_char);
    if (itm == am.end()) {
      tmp_dg_set.insert(tmp_digram);
      am.insert(pair<unsigned int, set<string> >(tmp_new_char, tmp_dg_set));
      tmp_dg_set.clear(); 
    } else {
      itm->second.insert(tmp_digram);
    }
    tmp_digram.clear();
  }
  nfa_file.close();
}

void alphabet_combine(set<set<string> > &dst_set, set<set<string> > src_set0, set<set<string> > src_set1) {
  set<set<string> >::iterator its0, its1;
  set<string> intersection; 
  for (its0 = src_set0.begin(); its0 != src_set0.end(); its0++) {
    if (dst_set.find(*its0) == dst_set.end()) {	/* its0 not in dst_set */
      if (its0->size() == 1) {
	dst_set.insert(*its0);
      } else {
	for (its1 = src_set1.begin(); its1 != src_set1.end(); its1++) {
	  if (its1->size() == 1) {
	    if (dst_set.find(*its1) == dst_set.end()) {
	      dst_set.insert(*its1);
	    }
	  } else {
	    set_intersection(its0->begin(), its0->end(), its1->begin(), its1->end(), inserter(intersection, intersection.begin()));
	    if (dst_set.find(intersection) == dst_set.end()) {
	      dst_set.insert(intersection);
 	    }
	    intersection.clear();
   	  }
        }
      }
    }
  }
}
