/*-----------------------------------------------------------------------------
 *re2dfa.cc
 * converts a regular expression to a minimized dfa.
 *
 * Author:  Randy Smith
 * Date:    19 May 2007
 *
 *
 *    Copyright 2006,2007 Randy Smith, smithr@cs.wisc.edu
 *
 *    This file contains unpublished confidential proprietary
 *    work of Randy Smith, Department of Computer Sciences,
 *    University of Wisconsin--Madison.  No use of any sort, including
 *    execution, modification, copying, storage, distribution, or reverse
 *    engineering is permitted without the express written consent of
 *    Randy Smith.
 *
 *-----------------------------------------------------------------------------
 * History:
 * $Log: re2dfa.cc,v $
 * Revision 1.2  2010/09/27 14:48:31  lyangru
 * final synchronization
 *
 * Revision 1.1  2009/06/09 18:51:22  vinodg
 * *** empty log message ***
 *
 * Revision 1.6  2008/09/12 17:48:05  smithr
 * Added a demo mode.
 *
 * Revision 1.5  2008/04/23 20:45:44  smithr
 * commented out demo() function call.
 *
 * Revision 1.4  2008/04/17 23:08:34  smithr
 * Added code to convert from dfa_tab_t to nfa_t
 *
 * Revision 1.3  2008/04/17 21:25:19  smithr
 * added the "demo" function to demonstrate the use of the APIs.
 *
 * Revision 1.2  2008/02/13 20:33:10  smithr
 * *** empty log message ***
 *
 * Revision 1.1  2007/08/07 18:00:52  smithr
 * initial check in to CVS
 *
 *
 *---------------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "re.h"
#include "nfa.h"
#include "minimize.h"
#include "globals.h"

void usage_and_die(const char *msg);
void demo(void);

/* this is defined in combine_alg.cc */
nfa_t* combine(nfa_t* NFA1, nfa_t* NFA2);


/*-----------------------------------------------------------------------------
 * main
 *---------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
   char buf[16384];
   unsigned int id;

   nfa_t *NFA = NULL;
   nfa_t *DFA = NULL;

   if (argc < 3)
   {
      demo();
      usage_and_die("Missing input data.");
   }

   memset(buf, '\0', 16384);
   strncpy(buf, argv[1], 16383);
   id = atoi(argv[2]);
   printf("regex is: %s\n", buf);

   NFA = re_to_nfa(buf, strlen(buf), 1, id);
   NFA->xfa_output("negate.nfa", id);
   DFA = NFA->make_dfa();

   /* do you want to minimize? */
   nfa_minimize(DFA);

   if (argc == 4)
   {
      DFA->xfa_output(argv[3], id);
   }


   delete NFA;
   delete DFA;

   return 1;
}


/*-----------------------------------------------------------------------------
 * usage_and_die
 *---------------------------------------------------------------------------*/
void usage_and_die(const char *msg)
{
   fprintf(stderr, "error: %s\n", msg);
   fprintf(stderr, "\nusage: re2dfa <regex> <id> <outfilename>\n");

   fprintf(stderr, "\n"
	   "re2dfa builds a minimized DFA from a regular expression.\n");

   fprintf(stderr, "\n"
   "An example input file is the following:\n"
   "\n"
   "       /[\\d./*]{5,}\\wx+-y|^(\\d:\\d:\\d\\/)/   \n"
   "       /([^\\n]*?<){55}/  \n");

   fprintf(stderr, "\n\n\n");
   exit(0);
}


/*-----------------------------------------------------------------------------
 * demo
 *   demonstrates the use of the DFA manipulation functions. 
 *---------------------------------------------------------------------------*/
void demo(void)
{
   nfa_t *NFA1 = NULL;
   nfa_t *DFA1 = NULL;
   nfa_t *NFA2 = NULL;
   nfa_t *DFA2 = NULL;
   nfa_t *NFA3 = NULL;
   nfa_t *DFA3 = NULL;
   nfa_t *cDFA = NULL;
  
   printf("Executing the demo.\n"
	  "The file 'demo_output.dfa' will be written to the current dir.\n");
   printf("--------------------------------------------------------------\n");

   /* build a couple of DFAs and minimize them. */
   NFA1 = re_to_nfa("/.*abc/", 7, 1, 20);
   DFA1 = NFA1->make_dfa();
   nfa_minimize(DFA1);

   NFA2 = re_to_nfa("/.*def/", 7, 2, 30);
   DFA2 = NFA2->make_dfa();
   nfa_minimize(DFA2);
   
   NFA3 = re_to_nfa("/.*1*10000/", 11, 3, 40);
   DFA3 = NFA3->make_dfa();
   nfa_minimize(DFA2);
   nfa_minimize(DFA3);

   /* now combine them */
   cDFA = combine(DFA1, DFA2);

   {
     dfa_tab_t dta;

     dta.populate(*cDFA);
     nfa_t* n = dta.to_nfa_t();

     nfa_minimize(n);

     n->xfa_output("n_output.dfa", 66); 

   }

   nfa_minimize(cDFA);

   if (cDFA)
   {
     cDFA->xfa_output("demo_output.dfa", 66);
   }
   else
   {
      fprintf(stderr,
	      "Error in %s (%s:%d): combine() failed. Exiting...\n",
	      __func__, __FILE__, __LINE__);
   }

   /* you can convert to a big table representation */
   dfa_tab_t dt, dt2;
   dt.populate(*cDFA);
   dt2.populate(*DFA3);
   


   /* here's how to match to input.
    * "true" means you want to match all intermediate positions, not
    * just at the end of the buffer. "false" will match only the
    * end of the buffer. */
   dt.simulate( (const unsigned char *)"xxabcxdefxxdef", 14, true);
   dt.simulate( (const unsigned char *)"xxdefxabc", 9, true);
   dt2.simulate( (const unsigned char *)"010000100001", 12, true);
   


   /* convert back to nfa_t format */
   nfa_t* n = dt.to_nfa_t();

   if (n)
   {
      n->xfa_output("n.dfa", 77);
   }

   dt.cleanup();


   delete n;
   delete NFA1;
   delete DFA1;
   delete NFA2;
   delete DFA2;
   delete cDFA;
}
