/*-----------------------------------------------------------------------------
 * file:    escape_sequences.cc
 * author:  Randy Smith
 * date:    14 July 2006
 * descr:   routines for manipulating and resolving escape sequences
 *
 *
 *
 *    Copyright 2006,2007 Randy Smith, smithr@cs.wisc.edu
 *
 *    This file contains unpublished confidential proprietary
 *    work of Randy Smith, Department of Computer Sciences,
 *    University of Wisconsin--Madison.  No use of any sort, including
 *    execution, modification, copying, storage, distribution, or reverse
 *    engineering is permitted without the express written consent of
 *    Randy Smith.
 *
 * History:
 *-----------------------------------------------------------------------------
 * $Log: escape_sequences.cc,v $
 * Revision 1.1  2009/06/09 18:51:21  vinodg
 * *** empty log message ***
 *
 * Revision 1.2  2008/02/13 20:33:10  smithr
 * *** empty log message ***
 *
 * Revision 1.1  2007/08/07 18:00:50  smithr
 * initial check in to CVS
 *
 * Revision 1.3  2006/08/10 22:03:20  smithr
 * Fixed handling of the '/' symbol
 *
 * Revision 1.2  2006/08/10 18:35:33  smithr
 * Several additions and bug fixes, including:
 * (1) user defined char classes implemented.
 * (2) escape sequences fully handled
 * (3) memory management addressed (destructors free mem as necessary)
 * (4) many other miscellaneous bugs, etc.
 *
 * Revision 1.1  2006/08/10 14:22:59  smithr
 * Initial check-in to CVS
 *
 * Revision 1.2  2006/07/17 17:43:42  smithr
 * replaced 0x output with \x output
 *
 * Revision 1.1  2006/07/15 04:54:01  smithr
 * Extended the grammar to accept negated symbols lists and concatenated
 * lists of symbols, e.g. "ACE" can be used as an alternative to 'A','C','E'
 *
 * Revision 1.2  2006/07/14 20:08:54  smithr
 * CC classes implemented
 *
 * Revision 1.1  2006/07/13 17:13:17  smithr
 * Initial check-in to CVS.  Code is function for KL and KKL patterns
 * in FTP rules.
 *
 *
 *---------------------------------------------------------------------------*/
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include "escape_sequences.h"
#include "errmsg.h"


extern int errno;
extern int lineno;

/*-----------------------------------------------------------------------------
 * convert_escape_sequences
 *   Given a string containing standard C escape sequences, this will
 *   return the same string, but all escape sequences are replaced with their
 *   actual values.
 *---------------------------------------------------------------------------*/
char *convert_escape_sequences(char *str, int *len)
{
   char *read, *write;
   int actual_read, val;
   
   write = read = str;
   
   while (*read != 0) {
      if (*read == '\\') {
	 val = parse_char_constant(read, &actual_read);
	 if (val < 0) {
	    *write++ = 'X';
	 } else {
	    *write++ = (char) val;
	 }
	 read += actual_read;
      } else {
	 *write++ = *read++;
      }
   }
   *write = 0;
   if (len)
       *len = (write - str);
   return(str);
}

/*-----------------------------------------------------------------------------
 * parse_char_constant
 *   given a char constant string, such as b,c,d,\n,\r, etc., this will
 *   give you the integer ascii value of that string.
 *---------------------------------------------------------------------------*/
int parse_char_constant(const char *s, int *actual_read)
{
   assert(s[0] == '\\');
   *actual_read = 2;
   
   switch (s[1]) {
      case 'f': return '\f';
      case 'n': return '\n';
      case 'r': return '\r';
      case 't': return '\t';
      case 'v': return '\v';
      case '|': return '|';
      case '(': return '(';
      case ')': return ')';
      case '{': return '{';
      case '}': return '}';
      case '[': return '[';
      case ']': return ']';
      case '^': return '^';
      case '$': return '$';
      case '\\': return '\\';
      case '+': return '+';
      case '*': return '*';
      case '?': return '\?';
      case '/': return '/';
      case '\'': return '\'';
      case '\"': return '\"';
      case 'x': {
	  int val=0, tmp, i;

	  for (i=2; (tmp = hex_digit(s[i])) >= 0; i++) {
	      val = 16 * val + tmp;
	  }
	  *actual_read = i;
	  if (i==2) {
	      errf(lineno, "expecting hex digits after \\x");
	      return(-1);
	  }
	  if (val > 255) {
	      errf(lineno, 
		   "explicit hex character (\\x) out of range with value %d",
		   val);
	      return(-1);
	  }
	  return(val);
	  break;
      }
      default:
	if (octal_digit(s[1]) >= 0) {  /* octal constant */
	    int val=0, tmp, i;
	    
	    for (i=1; (tmp = octal_digit(s[i])) >= 0  && i<4; i++) {
		val = 8 * val + tmp;
	    }
	    if (val > 255) {
		errf(lineno, "explicit octal character out of range");
		return(-1);
	    }
	    *actual_read = i;
	    return(val);
	} else {
	   /* perl warns that the special character is unknown, but
	    * it treats it as though it were just a plain (unescaped)
	    * character. So, we'll do the same */
	    warnf(lineno, "unknown special character \"\\%c\".  "
		  "Interpreting as \"%c\"", s[1], s[1]);
	    return (s[1]);
	}
    }
    /* unreachable */
}

/*-----------------------------------------------------------------------------
 * hex_digit
 *   returns the numeric value of the corresponding hex digit
 *---------------------------------------------------------------------------*/
int hex_digit(char in)
{
    if (in >= '0' && in <= '9') return(in - '0');
    if (in >= 'a' && in <= 'f') return(in - 'a' + 10);
    if (in >= 'A' && in <= 'F') return(in - 'A' + 10);
    return -1;
}

/*-----------------------------------------------------------------------------
 * octal_digit
 *  returns the value of the corresponding octal digit.
 *---------------------------------------------------------------------------*/
int octal_digit(char in)
{
    if (in >= '0' && in <= '7') return(in - '0');
    return -1;
}


/*-----------------------------------------------------------------------------
 * output_char
 *   converts a character into an output sequence
 * str - string into which printable output goes
 * n   - length of str (including space for \0
 * c   - value to convert
 *---------------------------------------------------------------------------*/
char * output_char(char *str, unsigned int n, unsigned int c,
		   bool inside_char_class)
{
   assert (c <= 255);
   assert ( n >= 4);

   bool icc = inside_char_class;

   const char *s = NULL;
   switch (c)
   {
      /* note that the following items do not
       * need to be escape inside []'s: |(){}[+*?^$.   */

      case '\f': s = "\\f"; break; 
      case '\n': s = "\\n"; break;
      case '\r': s = "\\r"; break; 
      case '\t': s = "\\t"; break;
      case '\v': s = "\\v"; break;
      case '|':  s = (icc ? "|" : "\\|"); break;
      case '(':  s = (icc ? "(" : "\\("); break;
      case ')':  s = (icc ? ")" : "\\)"); break;
      case '{':  s = (icc ? "{" : "\\{"); break;
      case '}':  s = (icc ? "}" : "\\}"); break;
      case '[':  s = (icc ? "[" : "\\["); break;
      case ']':  s = "\\]"; break;
      case '^':  s = (icc ? "^" : "\\^"); break;
      case '$':  s = (icc ? "$" : "\\$"); break;
      case '\\': s = "\\\\"; break; 
      case '+':  s = (icc ? "+" : "\\+"); break; 
      case '*':  s = (icc ? "*" : "\\*"); break;
      case '?':  s = (icc ? "?" : "\\?"); break;
      case '/':  s = (icc ? "/" : "\\/"); break;
      case '\'': s = "\\'"; break;
      case '\"': s = "\\\""; break;
      case '-':  s = (icc ? "-" : "\\-"); break;
      default:
	 break;
   }



   if (s != NULL)
   {
      strcpy(str, s);
   }
   else
   {
      if (c >= 0x20 && c <= 0x7e)
	 sprintf(str, "%c",c);
      else
	 sprintf(str, "\\x%x", (unsigned char)c);
   }

   return str;
}
