/*-----------------------------------------------------------------------------
 *nodes.cc
 * Implements the nodes used for building the pcre parse tree.
 *
 * Author:  Randy Smith
 * Date:    10 August 2006
 *
 *
 *    Copyright 2006,2007 Randy Smith, smithr@cs.wisc.edu
 *
 *    This file contains unpublished confidential proprietary
 *    work of Randy Smith, Department of Computer Sciences,
 *    University of Wisconsin--Madison.  No use of any sort, including
 *    execution, modification, copying, storage, distribution, or reverse
 *    engineering is permitted without the express written consent of
 *    Randy Smith.
 *
 *-----------------------------------------------------------------------------
 * History:
 * $Log: nodes.cc,v $
 * Revision 1.1  2009/06/09 18:51:22  vinodg
 * *** empty log message ***
 *
 * Revision 1.2  2008/02/13 20:33:10  smithr
 * *** empty log message ***
 *
 * Revision 1.1  2007/08/07 18:00:51  smithr
 * initial check in to CVS
 *
 * Revision 1.3  2007/05/14 20:48:04  smithr
 * Added:
 * (1) disallowing greedy operators *? +? ??
 * (2) Added tree-printing code.
 *
 * Revision 1.2  2006/08/10 18:35:34  smithr
 * Several additions and bug fixes, including:
 * (1) user defined char classes implemented.
 * (2) escape sequences fully handled
 * (3) memory management addressed (destructors free mem as necessary)
 * (4) many other miscellaneous bugs, etc.
 *
 * Revision 1.1  2006/08/10 14:23:00  smithr
 * Initial check-in to CVS
 *
 *
 *---------------------------------------------------------------------------*/
#include <list>
#include <assert.h>
#include "nodes.h"
#include "escape_sequences.h"
#include "globals.h"

#define UNUSED_ARG(x)  (void)(x)

void indent_(FILE *out, int amount)
{
   UNUSED_ARG(out);
   UNUSED_ARG(amount);
}

void indent_tree(FILE *out, int amount)
{
   unsigned int i;
   
   for (i=0; i < (unsigned int)amount; i++)
      fprintf(out, " ");
}


/*-----------------------------------------------------------------------------
 * base_tree implementation
 *---------------------------------------------------------------------------*/
base_tree::base_tree(void) {}
base_tree::~base_tree(void) {}


/*-----------------------------------------------------------------------------
 * disjunction implementation
 *---------------------------------------------------------------------------*/
disjunction::disjunction(void) {}

disjunction::~disjunction(void) 
{
   std::list<alternative *>::iterator li;

   for (li = alternatives.begin(); li != alternatives.end(); li++)
      delete (*li);
}

void disjunction::unparse(FILE *out, int indent)
{
   std::list<alternative *>::iterator li;

   bool first = true;
   for (li = alternatives.begin(); li != alternatives.end(); li++)
   {
       if (!first)
	   fprintf(out, "|");

      indent_(out, indent);
      (*li)->unparse(out, indent+2);
      first = false;
   }
}


void disjunction::printtree(FILE *out, int indent)
{
   std::list<alternative *>::iterator li;

   indent_tree(out, indent);
   fprintf(out, "<DIS>\n");
   for (li = alternatives.begin(); li !=alternatives.end(); li++)
   {
      (*li)->printtree(out, indent+2);
      //fprintf(out,"\n");
   }
}
   

/*-----------------------------------------------------------------------------
 * add_implicit_star
 *   adds a .* to the front of the parse
 *---------------------------------------------------------------------------*/
void disjunction::add_star(void)
{
   /* build the .* */
   char_class *cc = new char_class();
   cc->chars_.set();
   cc->named_class_ = '.';

   atom *a = new atom(cc);
   quantifier *q = new quantifier(quantifier::STAR);

   term *dotstar = new term(a, q);

   /* now, build up a spot for the new term */
   disjunction *d = new disjunction();
   d->alternatives = this->alternatives;
   this->alternatives.clear();

   atom *a2 = new atom(d);
   term *t2 = new term(a2, NULL);

   alternative *alt = new alternative();
   alt->terms.push_back(dotstar);
   alt->terms.push_back(t2);

   this->alternatives.push_back(alt);
}


/*-----------------------------------------------------------------------------
 * alternative implementation
 *---------------------------------------------------------------------------*/
alternative::alternative(void) {}

alternative::~alternative(void) 
{
   std::list<term *>::iterator li;

   for (li = terms.begin(); li != terms.end(); li++)
      delete (*li);
}

void alternative::unparse(FILE *out, int indent)
{
   std::list<term *>::iterator li;


   //if (terms.size() > 1)
   //{
   //   fprintf(out, "(");
   //}

   for (li = terms.begin(); li != terms.end(); li++)
   {
      indent_(out, indent);
      (*li)->unparse(out, indent+2);
   }

   //if (terms.size() > 1)
   //{
   //   fprintf(out, ")");
   //}
}

void alternative::printtree(FILE *out, int indent)
{
   std::list<term *>::iterator li;

   indent_tree(out, indent);
   fprintf(out, "<TERM>\n");
   for (li = terms.begin(); li != terms.end(); li++)
   {
      (*li)->printtree(out, indent+2);
      //fprintf(out,"\n");
   }
}


/*-----------------------------------------------------------------------------
 * term implementation
 *---------------------------------------------------------------------------*/
term::term(atom *child, quantifier *q) : a_(child), quant_(q) {}

term::~term(void) 
{
   if (a_)
      delete a_;

   if (quant_)
      delete quant_;
}

void term::unparse(FILE *out, int indent)
{
    if (a_)
	a_->unparse(out, indent+2);
   if(quant_)
      quant_->unparse(out, indent+2);
}

void term::printtree(FILE *out, int indent)
{
   indent_tree(out, indent);
   fprintf(out, "<ATOM>");
   if (a_) 
      a_->printtree(out, indent+2);
   else
   {
      indent_tree(out, indent+2);
      fprintf(out, "**NO ATOM**\n");
   }

   if (quant_)
   {
      indent_tree(out, indent);
      fprintf(out, "<QUANT>");
      if (quant_) 
	 quant_->printtree(out, indent+2);
      
      else
      {
	 indent_tree(out, indent+2);
	 fprintf(out, "**NO QUANT**\n");
      }
   }
}


/*-----------------------------------------------------------------------------
 * assertion implementation
 *---------------------------------------------------------------------------*/
assertion::assertion(assert_t a) : term(NULL, NULL), at(a) {}

assertion::~assertion(void) {}

void assertion::unparse(FILE *out, int indent)
{
   UNUSED_ARG(indent);

   if (at == CIRCUM)
      fprintf(out, "^");
   else if (at == DOLLAR)
      fprintf(out, "$");
   else
      assert(0);
}


void assertion::printtree(FILE *out, int indent)
{
   indent_tree(out, indent);

   if (at == CIRCUM)
      fprintf(out, "<ASSERT> ^\n");
   else if (at == DOLLAR)
      fprintf(out, "<ASSERT> $\n");
   else
      assert(0);
}


/*-----------------------------------------------------------------------------
 * quantifier implementation
 *---------------------------------------------------------------------------*/
quantifier::~quantifier(void) {}

quantifier::quantifier(q_t a) : quant_type_(a) 
{
   low_ = high_ = -1;
   greedy_ = 1;
}

void quantifier::set_greedy(unsigned int val)
{
   greedy_ = (val > 0);
}

unsigned int quantifier::get_greedy(void)
{
   return greedy_;
}

void quantifier::unparse(FILE *out, int indent)
{
   UNUSED_ARG(indent);

   switch(quant_type_)
   {
      case STAR:
	 fprintf(out, "*"); break;
      case PLUS:
	 fprintf(out, "+"); break;
      case QM:
	 fprintf(out, "?"); break;
      case RANGE:
	 if (low_ == high_)
	    fprintf(out, "{%d}", low_);
	 else if (high_ == -1)
	    fprintf(out, "{%d,}", low_);
	 else
	    fprintf(out, "{%d,%d}", low_, high_);
	 break;
      default:
	 assert(0);
   }

   if (!greedy_)
       fprintf(out, "?");
}

void quantifier::printtree(FILE *out, int indent)
{
   indent_tree(out, indent);

   switch(quant_type_)
   {
      case STAR:
	 fprintf(out, "<STAR> *"); break;
      case PLUS:
	 fprintf(out, "<PLUS> +"); break;
      case QM:
	 fprintf(out, "<QM> ?"); break;
      case RANGE:
	 if (low_ == high_)
	    fprintf(out, "{%d}", low_);
	 else if (high_ == -1)
	    fprintf(out, "{%d,}", low_);
	 else
	    fprintf(out, "{%d,%d}", low_, high_);
	 break;
      default:
	 assert(0);
   }

   fprintf(out, "\n");
}


/*-----------------------------------------------------------------------------
 * atom implementation
 *---------------------------------------------------------------------------*/
atom::atom(char_class *cc) : obj(cc)  { atom_type_ = CHAR_CLASS; }
atom::atom(base_tree *dis) : obj(dis) { atom_type_ = DISJUNCT; }

atom::~atom(void) 
{
   if (obj)
      delete obj;
}

void atom::unparse(FILE *out, int indent)
{
   (void)indent;

   if (atom_type_ == DISJUNCT)
      fprintf(out, "(");
   obj->unparse(out, indent);
   if (atom_type_ == DISJUNCT)
      fprintf(out, ")");
}

void atom::printtree(FILE *out, int indent)
{
   (void)indent;

   //indent_tree(out, indent);
   obj->printtree(out, indent+2);
   //fprintf(out, "\n");
}


/*-----------------------------------------------------------------------------
 * char_class implementation
 *---------------------------------------------------------------------------*/
char_class::~char_class(void) {}

char_class::char_class() : named_class_('\0'), negated_(false) 
{
   chars_.reset();
}

bool char_class::is_w() { return named_class_ == 'w'; }
bool char_class::is_W() { return named_class_ == 'W'; }
bool char_class::is_d() { return named_class_ == 'd'; }
bool char_class::is_D() { return named_class_ == 'D'; }
bool char_class::is_s() { return named_class_ == 's'; }
bool char_class::is_S() { return named_class_ == 'S'; }
bool char_class::is_dot() { return named_class_ == '.'; }


void char_class::add_chars(range *r, add_t mode)
{
   assert(r);

   if (r->low_ != -1 || r->high_ != -1)
   {
      if (mode == char_class::SET)
	 chars_.reset();

      for (int i=r->low_; i <= r->high_; i++)
	 chars_.set(i);
   }
   else
   {  
      set_chars(r->cclass_, mode);
   }
}

/*-----------------------------------------------------------------------------
 *set_chars
 *  Sets characters in the character class according to the supplied
 *  escaped class name (e.g., \s, \S, \d, \D, \w, \W).
 *  The 'mode' parameter specifies whether or not the escaped class clname
 *  should be the only thing in the instance, or whether it should be
 *  added to the instance.
 *---------------------------------------------------------------------------*/
void char_class::set_chars(unsigned char clname, add_t mode)
{
   bitset<256> tchars;

   switch(clname)
   {
      case 's':
	 tchars.reset();
	 tchars.set(' ');
	 tchars.set('\t');
	 tchars.set('\r');
	 tchars.set('\n');
	 tchars.set('\f');
	 break;

      case 'S':
	 tchars.reset();
	 tchars.set(' ');
	 tchars.set('\t');
	 tchars.set('\r');
	 tchars.set('\n');
	 tchars.set('\f');
	 tchars.flip();
	 break;

      case 'd':
	 tchars.reset();
	 for (int i='0'; i <= '9'; i++) tchars.set(i);
	 break;

      case 'D':
	 tchars.reset();
	 for (int i='0'; i <= '9'; i++) tchars.set(i);
	 tchars.flip();

	 break;
      case 'w':
	 tchars.reset();
	 for (int i='A'; i <= 'Z'; i++) tchars.set(i);
	 for (int i='a'; i <= 'z'; i++) tchars.set(i);
	 for (int i='0'; i <= '9'; i++) tchars.set(i);
	 break;

      case 'W':
	 tchars.reset();
	 for (int i='A'; i <= 'Z'; i++) tchars.set(i);
	 for (int i='a'; i <= 'z'; i++) tchars.set(i);
	 for (int i='0'; i <= '9'; i++) tchars.set(i);
	 tchars.flip();
	 break;

      default:
	 assert(0);
   }

   if (mode == SET)
      chars_ = tchars;
   else if (mode == ADD)
      chars_ |= tchars;
   else
      assert(0);
}


void char_class::unparse(FILE *out, int indent)
{
   UNUSED_ARG(indent);

   switch(named_class_)
   {
      case 'w': fprintf(out, "\\w"); break;
      case 'W': fprintf(out, "\\W"); break;
      case 'd': fprintf(out, "\\d"); break;
      case 'D': fprintf(out, "\\D"); break;
      case 's': fprintf(out, "\\s"); break;
      case 'S': fprintf(out, "\\S"); break;
      case '.': fprintf(out, "."); break;
      default:
      {
	 bool dot = false;

	 if (!negated_)
	 {
	    dot = true;
	    for (unsigned int i=0; i < 256; i++)
	    {
	       if (!chars_.test(i))
	       {
		  dot = false;
	       }
	    }
	    
	    if (dot)
	    {
	       fprintf(out, ".");
	    }   
	 }

	 if (!dot)
	 {
	    if (negated_)
	       fprintf(out, "[^");
	    else if (chars_.count() > 1)
	       fprintf(out, "[");
	    
	    for (unsigned int i=0; i < 256; i++)
	    {
	       if (chars_.test(i))
	       {
		  char outstr[10];
		  fprintf(out, "%s",output_char(outstr, 10, i,
						negated_||(chars_.count()>1))
		     );
	       }
	    }
	    
	    if (negated_ || (chars_.count() > 1))
	       fprintf(out, "]");
	 }
      }
   }
}


void char_class::printtree(FILE *out, int indent)
{
   indent_tree(out, indent);
   switch(named_class_)
   {
      case 'w': fprintf(out, "\\w"); break;
      case 'W': fprintf(out, "\\W"); break;
      case 'd': fprintf(out, "\\d"); break;
      case 'D': fprintf(out, "\\D"); break;
      case 's': fprintf(out, "\\s"); break;
      case 'S': fprintf(out, "\\S"); break;
      case '.': fprintf(out, "."); break;
      default:
	 if (negated_)
	    fprintf(out, "[^");
	 else if (chars_.count() > 1)
	    fprintf(out, "[");

	 for (unsigned int i=0; i < 256; i++)
	 {
	    if (chars_.test(i))
	    {
	       char outstr[10];
	       fprintf(out, "%s",output_char(outstr, 10, i,
					     negated_||(chars_.count()>1))
			  );
	    }
	 }

	 if (negated_ || (chars_.count() > 1))
	     fprintf(out, "]");
   }
   fprintf(out, "\n");
}


/*-----------------------------------------------------------------------------
 * range implementation
 *---------------------------------------------------------------------------*/
range::range(int low, int high) : low_(low), high_(high) {}


/*-----------------------------------------------------------------------------
 * modifiers implementation
 *---------------------------------------------------------------------------*/
modifiers::modifiers(void) {}

void modifiers::append(char x)  
{ 
  mods.push_back(x); 
}

void modifiers::process(void) 
{
  std::list<char>::iterator li;
  
  for (li = mods.begin(); li != mods.end(); li++)
    {
      switch(*li) 
	{
	case 'i':
	  g_flags.case_insensitive = 1; 
	  fprintf(stdout, "Setting case_insensitive g_flag\n");
	  break;

	case 'm':
	  g_flags.multi_line = 1; 
	  fprintf(stdout, "Setting multi_line g_flag\n");
	  break;

	case 't':
	  g_flags.implicit_star = 1; 
	  fprintf(stdout, "Setting implicit_star g_flag\n");
	  break;

	default:
	  fprintf(stdout, "unrecognized modifier %c\n", *li);
	  break;
	}
    }
}
