/*-----------------------------------------------------------------------------
 *nodes.h
 * Implements the nodes used for building the pcre parse tree.
 *
 * Author:  Randy Smith
 * Date:    10 August 2006
 *
 *
 *    Copyright 2006,2007 Randy Smith, smithr@cs.wisc.edu
 *
 *    This file contains unpublished confidential proprietary
 *    work of Randy Smith, Department of Computer Sciences,
 *    University of Wisconsin--Madison.  No use of any sort, including
 *    execution, modification, copying, storage, distribution, or reverse
 *    engineering is permitted without the express written consent of
 *    Randy Smith.
 *
 *-----------------------------------------------------------------------------
 * History:
 * $Log: nodes.h,v $
 * Revision 1.1  2009/06/09 18:51:22  vinodg
 * *** empty log message ***
 *
 * Revision 1.2  2008/02/13 20:33:10  smithr
 * *** empty log message ***
 *
 * Revision 1.1  2007/08/07 18:00:53  smithr
 * initial check in to CVS
 *
 * Revision 1.3  2007/05/14 20:48:04  smithr
 * Added:
 * (1) disallowing greedy operators *? +? ??
 * (2) Added tree-printing code.
 *
 * Revision 1.2  2006/08/10 18:35:36  smithr
 * Several additions and bug fixes, including:
 * (1) user defined char classes implemented.
 * (2) escape sequences fully handled
 * (3) memory management addressed (destructors free mem as necessary)
 * (4) many other miscellaneous bugs, etc.
 *
 * Revision 1.1  2006/08/10 14:23:01  smithr
 * Initial check-in to CVS
 *
 *
 *---------------------------------------------------------------------------*/
#ifndef NODES_H
#define NODES_H

#include <list>
#include <string>
#include <bitset>


extern int lineno;

using namespace std;

/* predeclare all classes */
class base_tree;
class disjunction;
class alternative;
class term;
class assertion;
class quantifier;
class atom;
class char_class;
class range;
class modifiers;

typedef disjunction regex_tree_t;

class base_tree
{
   public:
      base_tree(void);
      void virtual unparse(FILE *out, int indent=0) = 0;
      void virtual printtree(FILE *out, int indent=0) = 0;
      virtual ~base_tree(void) = 0;
   private:
      base_tree(base_tree&);
      base_tree& operator=(const base_tree & T);
};



class disjunction : public base_tree
{
   public:
      disjunction();
      ~disjunction();
      void unparse(FILE *out, int indent=0);
      void printtree(FILE *out, int indent=0);
      void add_star(void);
      list<alternative *> alternatives;
};


class alternative : public base_tree
{
   public:
      alternative();
      ~alternative();
      void unparse(FILE *out, int indent=0);
      void printtree(FILE *out, int indent=0);
      list<term *> terms;
};

/*-----------------------------------------------------------------------------
 * term
 *   Terms are either assertions or atoms.  We create distinct classes
 *   for each type of atom - atomstar, atomplus, atomqm, atomrange 
 *---------------------------------------------------------------------------*/
class term : public base_tree
{
   public:
      term(atom *child, quantifier *q);
      ~term();
      void unparse(FILE *out, int ident=0);
      void printtree(FILE *out, int indent=0);
      
      atom       *a_;
      quantifier *quant_;
};


/*-----------------------------------------------------------------------------
 * assertion
 *---------------------------------------------------------------------------*/
class assertion : public term
{
   public:
      typedef enum {CIRCUM, DOLLAR} assert_t;
      
   public:
      assertion(assert_t a);
      ~assertion(void);
      void unparse(FILE *out, int ident=0);
      void printtree(FILE *out, int indent=0);
      
      assert_t at;
};


/*-----------------------------------------------------------------------------
 * quantifier
 *---------------------------------------------------------------------------*/
class quantifier : public base_tree
{
   public:
      typedef enum {STAR, PLUS, QM, RANGE} q_t;
      
   public:
      quantifier(q_t q);
      ~quantifier();
      void unparse(FILE *out, int ident=0);
      void printtree(FILE *out, int indent=0);
      void set_greedy(unsigned int);
      unsigned int get_greedy(void);
      unsigned int greedy_;
      
      
      /* can be either a disjunction or a character class */
      q_t quant_type_;
      
      int low_, high_;
};


/*-----------------------------------------------------------------------------
 * atom
 *---------------------------------------------------------------------------*/
class atom : public base_tree
{
   public:
      typedef enum {CHAR_CLASS, DISJUNCT} a_t;
      
   public:
      atom(char_class *cc);
      atom(base_tree* dis);
      ~atom();
      void unparse(FILE *out, int ident=0);
      void printtree(FILE *out, int indent=0);
      
      
      /* can be either a disjunction or a character class */
      a_t atom_type_;
      base_tree *obj;
};




/*-----------------------------------------------------------------------------
 * char_class
 *   char_class is a unified representation of atoms.
 *---------------------------------------------------------------------------*/
class char_class : public base_tree
{
   public:
      typedef enum {ADD, SET} add_t;

   public:
      char_class();
      ~char_class();
      void unparse(FILE *out, int ident=0);
      void printtree(FILE *out, int indent=0);
      void set_chars(unsigned char clname, add_t mode);
      void add_chars(range *r, add_t mode);
      bool is_w();
      bool is_W();
      bool is_d();
      bool is_D();
      bool is_s();
      bool is_S();
      bool is_dot();
      
      /* if negated_ is true, then chars_ contains the
	 inverse of what we want to match */
      unsigned char named_class_;
      bool negated_;
      std::bitset<256> chars_;
};


/*-----------------------------------------------------------------------------
 * range
 *   range represents a particular range, or a character, in a user-defined
 *   character class.  It is not present in the parse tree but is instead
 *   only used to transfer information during parsing.  Hence, it is not
 *   derived from base_tree.
 *
 *   Since perl allows things like [\D] (which is the same as \D), we will
 *   support it too, hence the need for cclass_.
 *---------------------------------------------------------------------------*/
class range
{
   public:
      range(int low, int high);
      int low_;
      int high_;
      unsigned char cclass_;
};


class modifiers
{
   public:
      modifiers(void);
      void append(char x);
      void process(void);

      std::list<char> mods;
};

#endif
