/*--------------------------------------------------------------
 * dfa_simulation.cc
 * measure the performance of dfa simulation
 *
 * Author: Liu Yang
 * Date:   July 11, 2009
 * 
 * Commented the nfa_minimize() operation due to the fact that it
 * may generate multiple accept ids during the minimization.
 * -------------------------------------------------------------
 *  History:
 *  $Log: dfa_simulation.cc,v $
 *  Revision 1.2  2010/09/27 14:48:30  lyangru
 *  final synchronization
 *
 * 
 * ------------------------------------------------------------*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "re.h"
#include "nfa.h"
#include "minimize.h"
#include "globals.h"
#include <sys/time.h>
#include <sys/resource.h>

#define MAX_STRING_LEN	256
#define NUM_OF_TEST_ROUND	1

void usage_and_die(const char *msg);
/* this is defined in combine_alg.cc */
nfa_t* combine(nfa_t* NFA1, nfa_t* NFA2);

int cputime();

int main (int argc, char *argv[]) {

  FILE *f = NULL, *str_file = NULL;
  char buf[4096];
  char *p;
  int line = 1;
  unsigned int sid = 1;
  nfa_t *NFA[7];
  nfa_t *DFA[7];
  nfa_t *com_dfa, *tmp_dfa;
  dfa_tab_t com_dt;

  int num_str, j;
  char **test_str;
  int start, end;
  size_t total_str_len = 0;
  char str_stream[2000000UL];
  size_t str_cnt = 0;

  int i = 0;

   
   if (argc != 3) {
     printf("Usage: %s <regex file name> <stream file name>\n", argv[0]);
   }

   if ( (f=fopen(argv[1], "r")) == NULL) {
      printf("could not open file %s\n", argv[1]);
      exit(0);
   }

   memset(buf, 0, 4096);
   p = fgets(buf, 4096, f);
   p[strlen(p)-1] = '\0';  /* get rid of trailing \n */
   NFA[0] = re_to_nfa(buf, strlen(buf), line++, sid++);
   DFA[0] = NFA[0]->make_dfa();
   //nfa_minimize(DFA[0]);
   tmp_dfa = DFA[0];
   i++;

   memset(buf, 0, 4096);
   p = fgets(buf, 4096, f);
   while (p != NULL) {
      p[strlen(p)-1] = '\0';  /* get rid of trailing \n */
      NFA[i] = re_to_nfa(buf, strlen(buf), line++, sid++);
      DFA[i] = NFA[i]->make_dfa();
      //nfa_minimize(DFA[i]);
      com_dfa = combine(tmp_dfa, DFA[i]);
      tmp_dfa = com_dfa;
      i++;
      memset(buf, 0, 4096);
      p = fgets(buf, 4096, f);
   }
   fclose(f);
  
   //nfa_minimize(com_dfa);
   com_dt.populate(*com_dfa);
   com_dfa->bdd_output("combined_dfa_bdd2.txt", 42);

   /* load stream file for match testing */
  str_file = fopen(argv[2], "r");
  if (str_file == NULL) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }

  /* read the number of strings to test */
  fscanf(str_file, "%d\n", &num_str);
  test_str = (char **)malloc(num_str * sizeof(char *));
  if (test_str == NULL)
    perror("out of memory"), exit(1);
  
  for (i = 0; i < num_str; i++) {
    test_str[i] = (char *)malloc(MAX_STRING_LEN * sizeof(char));
    if (test_str[i] == NULL)
      perror("out of memory"), exit(1);
  }
  /* read the strings */
  for (i = 0; i < num_str; i++) {
    fgets(test_str[i], 4096, str_file);
    //com_dt.simulate((const unsigned char *)test_str[i], strlen(test_str[i]), true);
    memcpy(str_stream + str_cnt, test_str[i], strlen(test_str[i]));
    str_cnt += strlen(test_str[i]);
    total_str_len += strlen(test_str[i]); 
  }
  str_stream[str_cnt] = '\0';

  start = cputime();
  for (j = 0; j < NUM_OF_TEST_ROUND; j++) {
    com_dt.simulate((const unsigned char *)str_stream, str_cnt, true);
  }
  end = cputime();
  cout << "execution time is " << ((double)(end - start)/1000) / ((double)total_str_len*NUM_OF_TEST_ROUND/1000000000) << " s/GB" << endl; 

  for (i = 0; i < num_str; i++) 
    free(test_str[i]);
  free(test_str);

  fclose(str_file);  
  
  return 0;


}

int cputime()
{
  struct rusage rus;

  getrusage (RUSAGE_SELF, &rus);
  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
}
