/*--------------------------------------------------------------
 * mdfa_simulation.cc
 * Construct multiple DFA's and perform simulation 
 *
 * Author: Liu Yang
 * Date:   Aug 8, 2009
 *
 * Commented the nfa_minimize() operation due to the fact that it
 * may generate multiple accept ids during the minimization.
 * -------------------------------------------------------------
 *  History:
 *  $Log$
 * 
 * ------------------------------------------------------------*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "re.h"
#include "nfa.h"
#include "minimize.h"
#include "globals.h"
#include <sys/time.h>
#include <sys/resource.h>

#include <unistd.h>
#include <pcap.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <net/if.h>
#include <errno.h>

#include "clock.h"
#include "packet.h"
#include "decode.h"

#define MAX_STRING_LEN	256
#define NUM_OF_TEST_ROUND	1

#define DEBUG

using namespace std;

/* this is defined in combine_alg.cc */
nfa_t* combine(nfa_t* NFA1, nfa_t* NFA2);

typedef struct raw_packet
{
      int                allocated; // >0 iff pkt points to allocated data
      unsigned char      *pkbuf;
      packet             pkt;
      unsigned long      id;
      struct pcap_pkthdr hdr;
} raw_packet_t;


string trace_file;

pcap_t *pcap_handle;

/*-----------------------------------------------
 * Function Prototypes
 *----------------------------------------------*/

void init_pcap(pcap_t **handle, string& fname);
int get_next_packet(pcap_t *handle, raw_packet_t* pkt);
size_t dfa_tab_overhead(nfa_t *nfa);


int cputime();

int main (int argc, char *argv[]) {

  FILE *f = NULL;
  char buf[4096];
  char *p;
  int line = 1;
  unsigned int sid = 1;
  nfa_t *NFA;
  nfa_t *DFA;
  nfa_t *com_dfa, *tmp_dfa;
  vector<dfa_tab_t> MDFA;
  unsigned int lb_num_states_per_dfa = 2500;	/* lower bound of number of states per combined DFA */

  int start, end;
  size_t total_str_len = 0;

  int i = 0;
  unsigned long long starttm = 0;
  unsigned long long stoptm = 0;
  unsigned long long total_cycles = 0;
  size_t total_bytes = 0;
  size_t mem_consump = 0;
   
   if (argc != 4) {
     printf("Usage: %s <regex file name> <states limit> <trace file name>\n", argv[0]);
     printf("state limit: the least number of states for each combined DFA\n");
     exit(0);
   }

   if ( (f=fopen(argv[1], "r")) == NULL) {
      printf("could not open file %s\n", argv[1]);
      exit(0);
   }

   lb_num_states_per_dfa = atoi(argv[2]);

   memset(buf, 0, 4096);
   p = fgets(buf, 4096, f);
   p[strlen(p)-1] = '\0';  /* get rid of trailing \n */
   NFA = re_to_nfa(buf, strlen(buf), line++, sid++);
   DFA = NFA->make_dfa();
   nfa_minimize(DFA);
   tmp_dfa = DFA;
   i++;

   memset(buf, 0, 4096);
   p = fgets(buf, 4096, f);
   while ((p != NULL)) {
      cout << "i = " << i << endl;
      p[strlen(p)-1] = '\0';  /* get rid of trailing \n */
#ifdef DEBUG
      printf("sig: %s\n", p);
#endif
      NFA = re_to_nfa(buf, strlen(buf), line++, sid++);
      DFA = NFA->make_dfa();
      nfa_minimize(DFA);
      i++;
      if (tmp_dfa == NULL) {
	tmp_dfa = DFA;
      } else {
        com_dfa = combine(tmp_dfa, DFA);
	//nfa_minimize(com_dfa);
        delete tmp_dfa;
        tmp_dfa = com_dfa;
        delete NFA;
	if (com_dfa->states.size() > lb_num_states_per_dfa) {
	  dfa_tab_t dta;
	  dta.populate(*com_dfa);
	  MDFA.push_back(dta);
	  /* estimate memory consumption of this dfa */
	  mem_consump += dfa_tab_overhead(com_dfa);
	  delete com_dfa;
	  tmp_dfa = NULL;
        }
      } /* end else */
      memset(buf, 0, 4096);
      p = fgets(buf, 4096, f);
   }
   fclose(f);

   /* process the last possible combined DFA */
   if (tmp_dfa != NULL) {
     dfa_tab_t dta;
     dta.populate(*tmp_dfa);
     MDFA.push_back(dta);
     mem_consump += dfa_tab_overhead(tmp_dfa);
     delete tmp_dfa;
   }

   cout << "finish building MDFA, num of DFAs is " << MDFA.size() << endl;
 
  /* match test */
  trace_file = argv[3];
  init_pcap(&pcap_handle, trace_file);

  start = cputime();
  raw_packet_t rp;
  unsigned char *iphdr, *tcphdr, *payload;
  size_t tcphdr_len = 20;	/* default length of tcp header */
  size_t iphdr_len = 20;
  int payload_sz = 0;
  size_t total_payload_bytes = 0;

  while (get_next_packet(pcap_handle, &rp)) {
    /* locate the payload */
    iphdr = rp.pkbuf + 14;	/* skip the link hdr */
    iphdr_len = (*iphdr & 0x0f) * 4;	/* extract the length of ip hdr */
    tcphdr = iphdr + iphdr_len;
    tcphdr_len = ((*(tcphdr + 12) >> 4) & 0x0f) * 4;
    payload = tcphdr + tcphdr_len;
    payload_sz = rp.hdr.caplen - 14 - iphdr_len - tcphdr_len;
    if (payload_sz > 0) {
      rdtsc(starttm);
      for (i = 0; i < MDFA.size(); i++) {
        MDFA[i].simulate(payload, payload_sz, true);
      }
      rdtsc(stoptm);
      total_payload_bytes += payload_sz;	
      total_cycles += (stoptm - starttm);
      if (rp.allocated) {
        free(rp.pkbuf);
        rp.allocated = 0;
      }
    }
    total_bytes += rp.hdr.caplen;
  }

  end = cputime();

  cout << "Total payload bytes: " << total_payload_bytes << endl;
  cout << "Total bytes including headers: " << total_bytes << endl;  
  cout << "MDFA simulation: execution time is " << (double)total_cycles/(double)total_payload_bytes << " cycles/byte" << endl;
  cout << "Memory consumption of MDFA is " << mem_consump << " bytes" << endl;

  return 0;
}

int cputime()
{
  struct rusage rus;

  getrusage (RUSAGE_SELF, &rus);
  return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
}

/*--------------------------------
 * init_pcap
 * -------------------------------*/
void init_pcap(pcap_t **handle, string& fname)
{
   char errbuf[PCAP_ERRBUF_SIZE];

   *handle = pcap_open_offline(fname.c_str(), errbuf);
   if (handle == NULL)
   {
      fprintf(stderr, "pcap_open failed: %s\n", errbuf);
      exit(0);
   }
   fprintf(stdout, "Opened trace file %s\n", fname.c_str());
}

/*--------------------------------------
 * get_next_packet
 *-------------------------------------*/
int get_next_packet(pcap_t *handle, raw_packet_t* pkt)
{
   static int id=0;
   const u_int8_t *data;

   data = pcap_next(handle, &(pkt->hdr));
   if (!data)
      return 0;

   pkt->pkbuf = (u_int8_t *)calloc(1, pkt->hdr.caplen);
   memcpy(pkt->pkbuf, data, pkt->hdr.caplen);
   pkt->allocated = 1;
   return 1;
}

/*-------------------------------------
 * dfa_tab_overhead: calculates the space overhead of nfa 
 * after being converted to a dfa_tab_t object
 *------------------------------------*/
size_t dfa_tab_overhead(nfa_t *nfa) {
  size_t sz = 0;
  unsigned int num_acc_ids = 0;
  /* space consumed by transition table */
  sz += nfa->states.size() * sizeof(unsigned int) * MAX_SYMS;
  /* space consumed by accepting ids (also counting the space consumed by null pointers) */
  for (unsigned int i = 0; i < nfa->states.size(); i++) {
    num_acc_ids += nfa->states[i].accept_id.size();
  }
  sz += num_acc_ids * sizeof(int) + nfa->states.size() * sizeof(int);
  return sz;
}
