
/*---------------------------------------------------------------------------
 * File:	multi_byte_simul_no_val.cc
 *
 * Author:	Liu Yang
 * Date:	Jan 27, 2009
 *
 * Implementation of the multi-byte NFA simulation through multi-threads.
 * The main different between this implementation and multi_thread_simul is
 * that no validation phase is involved in this approach.
 *------------------------------------------------------------------------
 * $Log: multi_byte_simul_no_val.cc,v $
 * Revision 1.2  2010/09/27 14:21:07  lyangru
 * final synchronization
 *
 *
 *-----------------------------------------------------------------------*/


#include <iostream>
#include <fstream>
#include <map>
#include <set>
#include <vector>
#include <string>
#include <cstdlib>
#include <string.h>
#include <stdio.h>

#include <unistd.h>
#include <pcap.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <net/if.h>
#include <errno.h>
#include <pthread.h>

#include "nfa-multimap.h"
#include "clock.h"
#include "packet.h"
#include "decode.h"

#define BUF_SIZE	200000
#define NUM_THREADS	2


using namespace std;

typedef struct raw_packet
{
      int                allocated; // >0 iff pkt points to allocated data
      unsigned char      *pkbuf;
      packet             pkt;
      unsigned long      id;
      struct pcap_pkthdr hdr;
} raw_packet_t;


string trace_file;

pcap_t *pcap_handle;

/*-----------------------------------------------
 * Function Prototypes
 *----------------------------------------------*/

void init_pcap(pcap_t **handle, string& fname);
int get_next_packet(pcap_t *handle, raw_packet_t* pkt);

/*-----------------------------------------------------------
 * Global variables and functions for multi-threads simulation
 *----------------------------------------------------------*/
  unsigned int num_states;
  set<unsigned int> starts; 
  //multimap<unsigned int, unsigned int> big_starts;	// contains all states except for accepting states
  set<pair<unsigned int, unsigned int> > big_starts;
  //set<unsigned int> ending_frontiers;
  set<unsigned int> *p_ending_frontiers;
  set<unsigned int> ***ptrans_tab;
  set<unsigned int> ***ptrans_tab_copy;
  int **acc_ids, **acc_ids_copy;	/* keep the signature id */
  //unsigned char *g_payload;
  vector<validation_info> vec_validation;
  validation_info tmp_val;
  set<unsigned int> t1_cur_st, t1_next_st;
  set<pair<unsigned int, unsigned int> > t2_cur_st, t2_next_st;	// a state is a pair <src, st_num>

  void fill_ptrans_tab(char *f_nfa);
  void *head_simul(simul_thread_data *payload_and_starts);
  void *simul_with_src(simul_thread_data2 *payload_and_starts);

  char *testpayload = "/calendar";
  int payload_cnt = 0;

  unsigned long long t1_starttm = 0;
  unsigned long long t1_stoptm = 0;
  unsigned long long t1_total_cycles = 0;

  unsigned long long t2_starttm = 0;
  unsigned long long t2_stoptm = 0;
  unsigned long long t2_total_cycles = 0;


int main(int argc, char** argv) {

  unsigned long long starttm = 0;
  unsigned long long stoptm = 0;
  unsigned long long total_cycles = 0;
  size_t total_bytes = 0;
  unsigned int t_start, t_end, num_filled = 0;
  size_t trans_tab_sz = 0;
  // for threads
  int retval, rc, i, j, val_size = 0;
  unsigned int k;
  pthread_t threads[NUM_THREADS];
  simul_thread_data payload_and_starts;
  simul_thread_data2 payload_and_big_starts;

  if (argc != 3) {
    cerr << "Usage: %s <NFA file name> <trace file>" << endl;
    cout << "The NFA file should contains transitions of an NFA with epsilon transitions eliminated!" << endl;
    exit(1);
  }

  /* fill out the transition table */
  fill_ptrans_tab(argv[1]);

  /* fill out the starting states in thread arguments */
  payload_and_starts.starts = &starts;
  //payload_and_starts.b_first = 1;
  for (k = 0; k < num_states; k++) {
    if (!acc_ids[k])
      big_starts.insert(pair<unsigned int, unsigned int>(k, k));
  }
  //payload_and_big_starts.starts = &big_starts;

  /* ----read stream for match test---- */
  trace_file = argv[2];

  init_pcap(&pcap_handle, trace_file);

  t_start = cputime();

  raw_packet_t rp;
  unsigned char *iphdr, *tcphdr, *payload, *offset2;
  size_t tcphdr_len = 20;	/* default length of tcp header */
  size_t iphdr_len = 20;
  int payload_sz = 0;
  size_t total_payload_bytes = 0;
  int sublen2, veri_len;
  //while (get_next_packet(pcap_handle, &rp) & payload_cnt < 3) {
  while (get_next_packet(pcap_handle, &rp)) {
    /* locate the payload */
    iphdr = rp.pkbuf + 14;	/* skip the link hdr */
    iphdr_len = (*iphdr & 0x0f) * 4;	/* extract the length of ip hdr */
    tcphdr = iphdr + iphdr_len;
    tcphdr_len = ((*(tcphdr + 12) >> 4) & 0x0f) * 4;
    payload = tcphdr + tcphdr_len;
    payload_sz = rp.hdr.caplen - 14 - iphdr_len - tcphdr_len;

    if (payload_sz > 0) {
      //payload_sz = strlen(testpayload);
      //payload = (unsigned char*) testpayload;
      //payload_cnt++; 
      //g_payload_sz = payload_sz;
      //g_payload = payload;
      //g_sublen2 = payload_sz - payload_sz/4;
      // split the payload
      //payload_and_starts.starts = &starts;
      //printf("test payload: %s\n", payload);
      payload_and_starts.payload = payload;
      payload_and_starts.len = (int)(payload_sz * 0.65);
      offset2 = payload + (int)(payload_sz * 0.65);
      sublen2 = payload_sz - (int)(payload_sz * 0.65);
      payload_and_big_starts.payload = offset2;
      payload_and_big_starts.len = sublen2; 

      p_ending_frontiers = &t1_cur_st;
      p_ending_frontiers->clear();
      t1_next_st.clear();
      t2_next_st.clear();
      t2_cur_st.clear();

      rdtsc(starttm);
      //head_simul(&payload_and_starts);
      //printf("here\n");
      rc = pthread_create(&threads[0], NULL, (void*(*)(void*))head_simul, (void *) &payload_and_starts);
      rc = pthread_create(&threads[1], NULL, (void*(*)(void*))simul_with_src, (void *) &payload_and_big_starts);


      // wait threads to complete
      for (k = 0; k < 2; k++) {
	rc = pthread_join(threads[k], (void**)(&retval));
      }
      //rc = pthread_join(threads[0], (void**)(&retval));
      if (rc) {
        printf("ERROR; return code from pthread_join() is %d\n", rc);
        exit(-1);
      }

      rdtsc(stoptm);
      total_cycles += (stoptm - starttm);

      val_size = vec_validation.size();
      //printf("val_size = %d\n", val_size);
      if (val_size > 0) {
        //printf("validation ...\n");
        rdtsc(starttm);
	for (i = 0; i < val_size; i++) {
	  if (p_ending_frontiers->find(vec_validation[i].src_st) != p_ending_frontiers->end()) {
	    printf("Matched at offset %d sig ", vec_validation[i].offset);
	    j = 0;
	    while (acc_ids[vec_validation[i].acc_st][j] != -1) {
	      printf("%d ", acc_ids[vec_validation[i].acc_st][j]);
              j++;
            }
            printf("\n");
	  }
	}
        rdtsc(stoptm);
        total_cycles += (stoptm - starttm);
      	vec_validation.clear();
      } 

      total_payload_bytes += payload_sz;
      if (rp.allocated) {
        free(rp.pkbuf);
        rp.allocated = 0;
      }
    }
    
    total_bytes += rp.hdr.caplen;
  }

  t_end = cputime();
  //printf("Total num of pkts: %d, num of verified pkts: %d\n", payload_cnt, g_veri_payload_cnt);
  cout << "Total payload bytes: " << total_payload_bytes << endl;
  cout << "Total bytes including packet headers: " << total_bytes << endl;
  cout << "Total cycles: " << total_cycles << endl;
  cout << "Simulation: execution time is " << (double)total_cycles/(double)total_payload_bytes << " cycles/byte" << endl;
  cout << "Head thread execution time is " << (double)t1_total_cycles/(double)total_payload_bytes << " cycles/byte" << endl;
  cout << "Second thread execution time is " << (double)t2_total_cycles/(double)total_payload_bytes << " cycles/byte" << endl;
  cout << "Total time spent on simulation: " << (double)(t_end - t_start)/(double)1000 << " seconds" << endl;

  return 0;
}

/*--------------------------------
 * init_pcap
 * -------------------------------*/
void init_pcap(pcap_t **handle, string& fname)
{
   char errbuf[PCAP_ERRBUF_SIZE];

   *handle = pcap_open_offline(fname.c_str(), errbuf);
   if (handle == NULL)
   {
      fprintf(stderr, "pcap_open failed: %s\n", errbuf);
      exit(0);
   }
   fprintf(stdout, "Opened trace file %s\n", fname.c_str());
}

/*--------------------------------------
 * get_next_packet
 *-------------------------------------*/
int get_next_packet(pcap_t *handle, raw_packet_t* pkt)
{
   static int id=0;
   const u_int8_t *data;

   data = pcap_next(handle, &(pkt->hdr));
   if (!data)
      return 0;

   pkt->pkbuf = (u_int8_t *)calloc(1, pkt->hdr.caplen);
   memcpy(pkt->pkbuf, data, pkt->hdr.caplen);
   pkt->allocated = 1;
   return 1;
}

/*----------------------------------------------
 * fill_ptrans_tab
 * ------------------------------------------ */
void fill_ptrans_tab(char *f_nfa) {

  unsigned int num_accept_ids, num_accept_states, num_starts, num_transitions;
  unsigned int tmp_st, tmp_acc_id, tmps;
  multimap<unsigned int, unsigned int> accept_ids;
  vector<unsigned int> accepting_states;
  pair< multimap<unsigned int, unsigned int>::iterator, multimap<unsigned int,unsigned int>::iterator > ret;
  multimap<unsigned int, unsigned int>::iterator itv;
  unsigned int cur_state, next_state, sym, n_acc;
  unsigned int num_filled = 0, trans_tab_sz = 0;
  unsigned int i, j;

  ifstream nfa_file;
  nfa_file.open(f_nfa);
  if (!nfa_file) {
    cerr << "File could not be openned" << endl;
    exit(1);
  }   

  /* read num of states and num of accept ids (signature ids) */
  nfa_file >> num_states >> num_accept_ids;
  for (i = 0; i < num_accept_ids; i++) {
    nfa_file >> tmp_st >> tmp_acc_id;
    accept_ids.insert(pair<int, int>(tmp_st, tmp_acc_id));
  }
  
  /* read the accepting states */
  nfa_file >> num_accept_states;
  for (i = 0; i < num_accept_states; i++) {
    nfa_file >> tmps;
    //accepting_states.push_back(tmps);
  }

  /* read the start states (may be multiple for NFA) */
  nfa_file >> num_starts;
  if (num_starts == 0) {
    cerr << "Invalid number of starts" << endl;
    exit(1);
  }
  for (i = 0; i < num_starts; i++) {
    nfa_file >> tmps;
    starts.insert(tmps);
  }
   
  /* read transitions and put them in a lookup-table */
  nfa_file >> num_transitions;
  ptrans_tab = new set<unsigned int> **[num_states];
  ptrans_tab_copy = new set<unsigned int> **[num_states];
   
  for (i = 0; i < num_states; i++) {
    ptrans_tab[i] = new set<unsigned int> *[ALPHABET_SIZE];
    ptrans_tab_copy[i] = new set<unsigned int> *[ALPHABET_SIZE];
    for (j = 0; j < ALPHABET_SIZE; j++) {
      ptrans_tab[i][j] = NULL;
      ptrans_tab_copy[i][j] = NULL;
    }
  }

  for (i = 0; i < num_transitions; i++) {
    /* read one transition */
    nfa_file >> cur_state >> sym >> next_state;
    if (ptrans_tab[cur_state][sym] == NULL) {
      ptrans_tab[cur_state][sym] = new set<unsigned int>;
      ptrans_tab_copy[cur_state][sym] = new set<unsigned int>;
    }
    
    ptrans_tab[cur_state][sym]->insert(next_state);
    ptrans_tab_copy[cur_state][sym]->insert(next_state);
  }
  nfa_file.close();

  /* measure the size of transition table */
  for (i = 0; i < num_states; i++) {
    for (j = 0; j < ALPHABET_SIZE; j++) {
      if (ptrans_tab[i][j] != NULL) {
        trans_tab_sz += sizeof(set<unsigned int>) + ptrans_tab[i][j]->size() * sizeof(unsigned int);
        num_filled++;
      }
    }
  }

  cout << "Memory usage of trans tab " << trans_tab_sz + (num_states*ALPHABET_SIZE - num_filled) * sizeof(set<unsigned int> *) << " bytes" << endl;

  /* fill out the accepting id's table */
  acc_ids = new int *[num_states];
  acc_ids_copy = new int *[num_states];
  for (i = 0; i < num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      acc_ids[i] = new int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        acc_ids[i][j++] = itv->second;
      }
      acc_ids[i][j] = -1;
    } else {
      acc_ids[i] = 0;
    }
  }

  // for the copy of acc_ids
  for (i = 0; i < num_states; i++) {
    j = 0;
    n_acc = 0;
    ret = accept_ids.equal_range(i);
    if (ret.second != ret.first) {
      /* get the number of acc ids before allocating space */	
      for (itv = ret.first; itv != ret.second; ++itv) {
	n_acc++;
      }
      acc_ids_copy[i] = new int[n_acc + 1];
      for (itv = ret.first; itv != ret.second; ++itv) {
        acc_ids_copy[i][j++] = itv->second;
      }
      acc_ids_copy[i][j] = -1;
    } else {
      acc_ids_copy[i] = 0;
    }
  }


}


/*----------------------------------------------
 * mt_simulate
 * ---------------------------------------------*/
void *head_simul(simul_thread_data *payload_and_starts) {
  //set<unsigned int> cur_st, next_st;
  set<unsigned int> *p_cur_st, *p_next_st, *p_tmp;
  set<unsigned int>::iterator it;
  unsigned int i, j, payload_sz;
  unsigned char *buf;

  rdtsc(t1_starttm);

  buf = payload_and_starts->payload;
  payload_sz = payload_and_starts->len;
  //t1_cur_st = *(payload_and_starts->starts);
  t1_cur_st = starts;

  p_cur_st = &t1_cur_st;
  p_next_st = &t1_next_st;

  //printf("mt_simulate(): payload = %s, len = %d, num_starts = %d\n", buf, payload_sz, cur_st.size());
  for (i = 0; i < payload_sz; i++) {
    /* this is the most time consuming part */
    for (it = p_cur_st->begin(); it != p_cur_st->end(); it++) {
      if (ptrans_tab[*it][buf[i]] != NULL) {
        //next_st.insert(ptrans_tab[*it][buf[i]]->begin(), ptrans_tab[*it][buf[i]]->end());
        p_next_st->insert(ptrans_tab[*it][buf[i]]->begin(), ptrans_tab[*it][buf[i]]->end());
      }
    }
    /* check for acceptance */
    //for (it = next_st.begin(); it != next_st.end(); it++) {
    for (it = p_next_st->begin(); it != p_next_st->end(); it++) {
      if (acc_ids[*it]) {
        j = 0;
      	printf("Matched at offset %u: ", i);
        while (acc_ids[*it][j] != -1) {
          printf("%d ", acc_ids[*it][j]);
          j++;
        }
        printf("\n"); 
      }
    }
    //cur_st = next_st;
    //next_st.clear();
    p_tmp = p_cur_st;
    p_cur_st = p_next_st;
    p_next_st = p_tmp;
    p_next_st->clear();
  }
  // update the ending frontiers of the first thread
  //ending_frontiers = cur_st;
  p_ending_frontiers = p_cur_st;
  rdtsc(t1_stoptm);
  t1_total_cycles += (t1_stoptm - t1_starttm);
  pthread_exit(NULL);
}



/* simulate() which keeps records of the sources of states 
 * this is used by a non-head NFA in multibyte NFA matching */
void *simul_with_src(simul_thread_data2 *payload_and_starts) {
  //set<pair<unsigned int, unsigned int> > cur_st, next_st;	// a state is a pair <src, st_num>
  set<pair<unsigned int, unsigned int> > *p_cur_st, *p_next_st, *p_tmp;	// a state is a pair <src, st_num>
  set<pair<unsigned int, unsigned int> >::iterator it;
  unsigned int i, payload_sz;
  unsigned char *buf;
  set<unsigned int> *pt;
  set<unsigned int>::iterator its;

  rdtsc(t2_starttm);
  // let starts be all states except for the accepting states, this part was moved out of the procedure to save simul time
  buf = payload_and_starts->payload;
  payload_sz = payload_and_starts->len;
  //cur_st = *(payload_and_starts->starts);
  t2_cur_st = big_starts;

  p_cur_st = &t2_cur_st;
  p_next_st = &t2_next_st;

  for (i = 0; i < payload_sz; i++) {
    /* this is the most time consuming part */
    //for (it = cur_st.begin(); it != cur_st.end(); it++) {
    for (it = p_cur_st->begin(); it != p_cur_st->end(); it++) {
      pt = ptrans_tab[it->first][buf[i]];
      if (pt != NULL) {
	for (its = pt->begin(); its != pt->end(); its++) {
	  //printf("i = %d, insert to next...\n", i);
	  //next_st.insert(pair<unsigned int, unsigned int>(*its, it->second));
	  p_next_st->insert(pair<unsigned int, unsigned int>(*its, it->second));
	  //if (next_st.size() % 500 == 0)
	    //printf("next_st.size() = %d\n", next_st.size());
 	}
      }
    }
    /* check for acceptance */
    //for (it = next_st.begin(); it != next_st.end(); it++) {
    for (it = p_next_st->begin(); it != p_next_st->end(); it++) {
      if (acc_ids[it->first]) {
	// keep info for later validation
	tmp_val.acc_st = it->first;
	tmp_val.src_st = it->second;
	tmp_val.offset = i;
	vec_validation.push_back(tmp_val);
      }
    } 
    //cur_st = next_st;
    //next_st.clear();
    p_tmp = p_cur_st;
    p_cur_st = p_next_st;
    p_next_st = p_tmp;
    p_next_st->clear();

  }
  rdtsc(t2_stoptm);
  t2_total_cycles += (t2_stoptm - t2_starttm);
  pthread_exit(NULL);
}


