/***************************************************************************
* Token.h                                                                  *
*                                                                          *
* The Token class ecapsulates a lexical analyzer for C++-like syntax.      *
* A token instance is associated with one or more text files, and          *
* grabs C++ tokens from them sequentially.  There are many member          *
* functions designed to make parsing easy, such as "==" operators for      *
* strings and characters, and automatic conversion of numeric tokens       *
* into numeric values.                                                     *
*                                                                          *
* Files can be nested via #include directives, and both styles of C++      *
* comments are supported.                                                  *
*                                                                          *
*   HISTORY                                                                *
*      Name    Date        Description                                     *
*                                                                          *
*      arvo    10/05/99    Fixed bug in TokFrame string allocation.        *
*      arvo    01/15/95    Added ifdef, ifndef, else, and endif.           *
*      arvo    02/13/94    Added Debug() member function.                  *
*      arvo    01/22/94    Several sections rewritten.                     *
*      arvo    06/19/93    Converted to C++                                *
*      arvo    07/15/89    Rewritten for scene description parser.         *
*      arvo    01/22/89    Initial coding.                                 *
*                                                                          *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo                                           *
*                                                                          *
* This program is free software; you can redistribute it and/or modify it  *
* under the terms of the GNU General Public License as published by the    *
* Free Software Foundation.  See http://www.fsf.org/copyleft/gpl.html      *
*                                                                          *
* This program is distributed in the hope that it will be useful, but      *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for    *
* any particular purpose.  See the GNU General Public License for more     *
* details.                                                                 *
*                                                                          *
***************************************************************************/
#include <stdlib.h>
#include <string.h>
#include <util/Token.h>
#include <util/Char.h>

FILE*  Token::debug = NULL;  // Static data member of Token class.
int    Token::argc  = 0;
char** Token::argv  = NULL;

typedef TokMacro *TokMacroPtr;

static const int True      = 1;
static const int False     = 0;
static const int HashConst = 217;  // Size of hash-table for macros.


TokFrame::TokFrame()
    {
    next   = NULL;
    source = NULL;
    fname  = NULL;
    line   = 0;
    column = 0;
    }

TokFrame::~TokFrame()
    {
    if( fname != NULL ) delete[] fname;
    if( source != NULL ) fclose( source );
    }

void TokFrame::operator=( const TokFrame &frame )
    {
    next   = frame.next;
    source = frame.source;
    fname  = strdup( frame.fname );
    line   = frame.line;
    column = frame.column;
    }

static int HashName( const char *str )
    {
    static int prime[5] = { 7, 11, 17, 23, 3 };
    int k = 0;
    int h = 0;
    while( *str != NullChar )
        {
        h += (*str++) * prime[k++];
        if( k == 5 ) k = 0;
        }
    if( h < 0 ) h = 0;  // Check for overflow.
    return h % HashConst;
    }

TokMacro *Token::MacroLookup( const char *str ) const
    {
    if( table == NULL ) return NULL;
    int i = HashName( str );
    for( TokMacro *m = table[i]; m != NULL; m = m->next )
        {
        if( strcmp( str, m->macro ) == 0 ) return m;
        }
    return NULL;
    }

int Token::MacroReplace( char *str, int &length, TokType &type ) const
    {
    TokMacro *m = MacroLookup( str );
    if( m == NULL ) return 0;
    strcpy( str, m->repl );
    length = strlen( str );
    type   = m->type;
    return 1;
    }

/*-------------------------------------------------------------------------*
 *  D e b u g  P r i n t                                                   *
 *                                                                         *
 *  This routine is used to record the entire token stream in a file to    *
 *  use as a debugging aid.  It does not affect the action of the lexer;   *
 *  it merely records a "shadow" copy of all the tokens that are read by   *
 *  ANY Token instance.  The data that is written to the file is           *
 *                                                                         *
 *  <Line number>  <Column number>  <File name>  <Token>                   *
 *                                                                         *
 *-------------------------------------------------------------------------*/
static void DebugPrint( const Token &tok, FILE *fp )
    {
    fprintf( fp, "%3d %3d  ", tok.Line(), tok.Column() );
    fprintf( fp, "%s  "     , tok.FileName() ); 
    fprintf( fp, "%s\n"     , tok.Spelling() );
    fflush ( fp );
    }

/*-------------------------------------------------------------------------*
 *  T o k e n   (Constructors)                                             *
 *                                                                         *
 *-------------------------------------------------------------------------*/
Token::Token( const char *file_name )
    {
    Init();
    Open( file_name );
    }

Token::Token( FILE *fp )
    {
    Init();
    Open( fp );
    }

Token::Token( )
    {
    Init();
    }

/*-------------------------------------------------------------------------*
 *  T o k e n   (Destructor)                                               *
 *                                                                         *
 *  Close all files and deletes all frames and paths.                      *
 *                                                                         *
 *-------------------------------------------------------------------------*/
Token::~Token( )
    {
    // Don't try to delete "frame" as its a member of this class, not 
    // something that we've allocated.
    TokFrame *f = frame.next;
    while( f != NULL )
        {
        TokFrame *n = f->next;
        delete f;
        f = n;
        }
    ClearPaths();
    }

/*-------------------------------------------------------------------------*
 *  O p e n                                                                *
 *                                                                         *
 *  Establish a new file to read from, either by name, or by pointer.      *
 *                                                                         *
 *-------------------------------------------------------------------------*/
void Token::Open( const char *file_name )
    {
    FILE *fp = fopen( file_name, "r" );
    if( fp == NULL ) return;
    Open( fp );
    frame.fname = strdup( file_name );
    }

void Token::Open( FILE *fp )
    {
    frame.source = fp;
    frame.line   = 1;
    frame.column = 0;
    pushed       = NullChar;
    }

/*-------------------------------------------------------------------------*
 *  O p e r a t o r  ==                                                    *
 *                                                                         *
 *  A token can be compared with a string, a single character, or a type.  *
 *                                                                         *
 *-------------------------------------------------------------------------*/
int Token::operator==( const char *s ) const
    {
    const char *t = spelling;
    if( case_sensitive )
        {
        do { if( *s != *t ) return False; } 
        while( *s++ && *t++ );
        }
    else
        {
        do { if( ToUpper(*s) != ToUpper(*t) ) return False; } 
        while( *s++ && *t++ );
        }
    return True;
    }

int Token::operator==( char c ) const
    {
    if( length != 1 ) return False;
    if( case_sensitive ) return spelling[0] == c;
    else return ToUpper(spelling[0]) == ToUpper(c);
    }

int Token::operator==( TokType _type_ ) const 
    {
    int match = 0;
    switch( _type_ )
        { 
        case T_char   : match = ( type == T_string  && Len() == 1      ); break;
        case T_numeric: match = ( type == T_integer || type == T_float ); break;
        default       : match = ( type == _type_                       ); break;
        }
    return match;
    }
 
/*-------------------------------------------------------------------------*
 *  O p e r a t o r  !=                                                    *
 *                                                                         *
 *  Define negations of the three types of "==" tests.                     *
 *                                                                         *
 *-------------------------------------------------------------------------*/
int Token::operator!=( const char *s ) const { return !( *this == s ); }
int Token::operator!=( char        c ) const { return !( *this == c ); }
int Token::operator!=( TokType     t ) const { return !( *this == t ); }

/*-------------------------------------------------------------------------*
 *  E r r o r                                                              *
 *                                                                         *
 *  Print error message to "stderr" followed by optional "name".           *
 *                                                                         *
 *-------------------------------------------------------------------------*/
void Token::Error( TokError error, const char *name )
    {
    char *s;
    switch( error )
        {
        case T_malformed_float   : s = "malformed real number   "; break;
        case T_unterm_string     : s = "unterminated string     "; break;
        case T_unterm_comment    : s = "unterminated comment    "; break;
        case T_file_not_found    : s = "include file not found: "; break;
        case T_unknown_directive : s = "unknown # directive     "; break;
        case T_string_expected   : s = "string expected         "; break;
        case T_putback_error     : s = "putback overflow        "; break;
        case T_name_too_long     : s = "file name is too long   "; break;
        case T_no_endif          : s = "#endif directive missing"; break;
        case T_extra_endif       : s = "#endif with no #ifdef   "; break;
        case T_extra_else        : s = "#else with no #ifdef    "; break;
        default                  : s = "unknown error type      "; break;
        }
    fprintf( stderr, "LEXICAL ERROR, line %d, column %d: %s", 
        frame.line, frame.column, s );
    if( name == NULL )
         fprintf( stderr, "  \n"       );
    else fprintf( stderr, "%s\n", name );
    exit( 1 );
    }

/*-------------------------------------------------------------------------*
 *  G e t c                                                                *
 *                                                                         *
 *  This routine fetches one character at a time from the current file     *
 *  being read.  It is responsible for keeping track of the column number  *
 *  and for handling single characters that have been "put back".          *
 *                                                                         *
 *-------------------------------------------------------------------------*/
int Token::Getc( int &c )
    {
    if( pushed != NullChar )  // Return the pushed character.
        {
        c = pushed;
        pushed = NullChar;
        }
    else  // Get a new character from the source file.
        {
        c = getc( frame.source );
        frame.column++;
        }
    return c;
    }

/*-------------------------------------------------------------------------*
 *  N o n W h i t e                                                        *
 *                                                                         *
 *  This routine implements a simple finite state machine that skips       *
 *  white space and recognizes the two styles of comments used in C++.     *
 *  It returns the first non-white character not part of a comment.        *
 *                                                                         *
 *-------------------------------------------------------------------------*/
int Token::NonWhite( int &c )
    {
    start_state:
        Getc( c );
        if( c == Space   ) goto start_state;
        if( c == Tab     ) goto start_state;
        if( c == NewLine ) goto start_new_line;
        if( c == Slash   ) goto start_comment;
        goto return_char;

    start_comment:
        Getc( c );
        if( c == Star    ) goto in_comment1;  
        if( c == Slash   ) goto in_comment2;  
        Unget( c );
        c = Slash;
        goto return_char;

    in_comment1:
        Getc( c );
        if( c == Star    ) goto end_comment1;
        if( c == NewLine ) goto newline_in_comment;
        if( c == EOF     ) goto return_char;
        goto in_comment1;

    end_comment1:
        Getc( c );
        if( c == Slash   ) goto start_state;
        if( c == NewLine ) goto newline_in_comment;
        if( c == EOF     ) goto unterm_comment;
        goto in_comment1;

    in_comment2:
        Getc( c );
        if( c == NewLine ) goto start_new_line;
        if( c == EOF     ) goto return_char;
        goto in_comment2;

    unterm_comment:
        Error( T_unterm_comment );
        c = EOF;
        goto return_char;

    start_new_line:
        frame.line++;
        frame.column = 0;
        goto start_state;

    newline_in_comment:
        frame.line++;
        frame.column = 0;
        goto in_comment1;

    return_char:
        Tcolumn = frame.column;  // This is where the token starts.
        return c;
    }

/*-------------------------------------------------------------------------*
 *  N e x t R a w T o k                                                    *
 *                                                                         *
 *-------------------------------------------------------------------------*/
int Token::NextRawTok( )
    {
    static int Trans0[] = { 0, 1, 3, 3, 3 };  // Found a digit.
    static int Trans1[] = { 5, 6, 4, 6, 7 };  // Found a sign.
    static int Trans2[] = { 1, 6, 7, 6, 7 };  // Found decimal point.
    static int Trans3[] = { 2, 2, 7, 6, 7 };  // Found an exponent.
    static int Trans4[] = { 5, 6, 7, 6, 7 };  // Found something else.
    char       *tok     = spelling;
    int        state;
    int        c;

    length = 0;
    type   = T_null;

    // Skip comments and whitespace.

    if( NonWhite( c ) == EOF ) goto endtok;

    // Is this the beginning of an identifier?  If so, get the rest. 

    if( isAlpha( c ) )
        {
        type = T_ident;
        do  {
            *tok++ = c;
            length++;
            if( Getc( c ) == EOF ) goto endtok;
            }
        while( isAlpha( c ) || isDigit( c ) || c == Underscore );
        Unget( c );
        goto endtok;
        }

    // Is this the beginning of a number?

    else if( isDigit( c ) || c == Minus || c == Period )
        {
        char c1 = c;
        state = 0;
        for(;;)
            {
            *tok++ = c;
            length++;
            switch( Getc( c ) )
                {
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9': state = Trans0[ state ]; break;
                case '+': 
                case '-': state = Trans1[ state ]; break;
                case '.': state = Trans2[ state ]; break;
                case 'e':
                case 'E': state = Trans3[ state ]; break;
                default : state = Trans4[ state ]; break;
                }
            switch( state )
                {
                case 5 : Unget( c ); 
                         type = ( c1 == Period ) ? T_float : T_integer; 
                         goto endtok;
                case 6 : Unget( c ); type = T_float  ; goto endtok;
                case 7 : Error( T_malformed_float ) ; break;
                default: continue;
                }
            } // for
        } // if numeric 

    // Is this the beginning of an operator?

    if( c == '*' || c == '>' || c == '<' || c == '+' || c == '-' || c == '!' )
        {
        char oldc = c;
        type = T_other;
        *tok++ = c;
        length++;
        if( Getc( c ) == EOF ) goto endtok;
        if( c == oldc || c == EqualSign )
            {
            *tok++ = c;
            length++;
            }
        else Unget( c );
        goto endtok;
        }

    // Is this the beginning of a string?

    else if( c == DoubleQuote )
        {
        type = T_string;
        while( Getc( c ) != EOF && length < MaxTokenLen )
            {
            if( c == DoubleQuote ) goto endtok;
            *tok++ = c;
            length++;
            }
        Error( T_unterm_string );
        }

    // Is this the beginning of a "#" directive?

    else if( c == Hash )
        {
        type = T_directive;
        NonWhite( c );
        while( isAlpha( c ) )
            {
            *tok++ = c;
            length++;
            Getc( c );
            }
        Unget( c );
        goto endtok;
        }

    // This must be a one-character token. 
    
    else
        {
        *tok++ = c;
        length = 1;
        type   = T_other;
        }

    endtok: // Jump to here when token is completed.

    *tok = NullChar;  // Terminate the string.
    if( debug != NULL ) DebugPrint( *this, debug );

    return length;
    }

/*-------------------------------------------------------------------------*
 *  N e x t T o k                                                          *
 *                                                                         *
 *-------------------------------------------------------------------------*/
int Token::NextTok( )
    {
    NextRawTok();

    // If the token is an identifier, see if it's a macro.
    // If the macro substitution is null, get another token.

    if( type == T_ident )
        {
        if( table != NULL )
            {
            if( MacroReplace( spelling, length, type ) && debug != NULL ) 
                DebugPrint( *this, debug );
            }
        if( type == T_nullmacro ) NextTok();
        }
    return length;
    }

/*-------------------------------------------------------------------------*
 *  O p e r a t o r  - -                                                   *
 *                                                                         *
 *  Puts back the last token found.  Only one token can be put back.       *
 *                                                                         *
 *-------------------------------------------------------------------------*/
Token & Token::operator--( )  // Put the last token back. 
    {
    if( put_back ) Error( T_putback_error );  // Can only handle one putback.
    put_back = 1; 
    return *this;
    }

Token & Token::operator--( int )  // Postfix decrement.
    {
    fprintf( stderr, "Postfix decrement is not implemented for the Token class.\n" );
    return *this;
    }

/*-------------------------------------------------------------------------*
 *  H a n d l e   D i r e c t i v e                                        *
 *                                                                         *
 *  Directive beginning with "#" must be handled by the lexer, as they     *
 *  determine the current source file via "#include", etc.                 *
 *                                                                         *
 *  Returns 1 if, after handling this directive, we now have the next      *
 *  token.                                                                 *
 *                                                                         *
 *-------------------------------------------------------------------------*/
int Token::HandleDirective( )
    {
    FILE *fp;
    char name[128];
    if( *this == "define" )
        {
        NextRawTok(); 
        strcpy( tempbuff, Spelling() );  // This is the macro name.
        int line = Line();
        NextRawTok();
        if( Line() == line )
            AddMacro( tempbuff, Spelling(), Type() );
        else
            {
            // If next token is on a different line; we went too far.
            AddMacro( tempbuff, "", T_nullmacro );
            return 1;  // Signal that we already have the next token.
            }
        }
    else if( *this == "include" )
        {
        NextRawTok();
        if( *this == "<" )
            {
            GetName( name, sizeof(name) );
            PushFrame( ResolveName( name ), name );
            }
        else if( type == T_string )
            {
            fp = fopen( spelling, "r" );
            if( fp == NULL ) Error( T_file_not_found, spelling );
            else PushFrame( fp, spelling );
            }
        else Error( T_string_expected );
        }
    else if( *this == "ifdef" )
        {
        NextRawTok();
        TokMacro *m = MacroLookup( Spelling() );
        if( m == NULL )  // Skip until else or endif.
            {
            while( *this != T_null )
                {
                NextRawTok();
                if( *this != T_directive ) continue;
                if( *this == "endif" ) break;
                if( *this == "else"  ) { if_nesting++; break; }  // Like m != NULL.
                }
            if( *this == T_null ) Error( T_no_endif );
            return 0; // Ready to get the next token.
            }
        else if_nesting++;
        }
    else if( *this == "ifndef" )
        {
        NextRawTok();
        TokMacro *m = MacroLookup( Spelling() );
        if( m != NULL )  // Skip until else or endif.
            {
            while( *this != T_null )
                {
                NextRawTok();
                if( *this != T_directive ) continue;
                if( *this == "endif" ) break;
                if( *this == "else"  ) { if_nesting++; break; }  // Like m == NULL.
                }
            if( *this == T_null ) Error( T_no_endif );
            return 0; // Ready to get the next token.
            }
        else if_nesting++;
        }
    else if( *this == "else" )  // Skip until #endif.
        {
        if( if_nesting == 0 ) Error( T_extra_else );
        while( *this != T_null )
            {
            NextRawTok();
            if( *this == T_directive && *this == "endif" ) break;
            }
        if( *this == T_null ) Error( T_no_endif );
        if_nesting--;
        return 0; // Ready to get next token.
        }
    else if( *this == "endif" )
        {
        if( if_nesting == 0 ) Error( T_extra_endif );
        if_nesting--;
        return 0; // Ready to get next token.
        }
    else if( *this == "error" )
        {
        int line = Line();
        NextTok(); // Allow macro substitution.
        if( Line() == line )
            {
            fprintf( stderr, "(preprocessor, line %d) %s\n", line, Spelling() );
            return 0; // Ready to get next token.
            }
        else
            {
            // If next token is on a different line; we went too far.
            fprintf( stderr, "(null preprocessor message, line %d)\n", line );
            return 1;  // Signal that we already have the next token.
            }
        }
    return 0;
    }


/*-------------------------------------------------------------------------*
 *  O p e r a t o r  + +                                                   *
 *                                                                         *
 *  Grab the next token from the current source file.  If at end of file,  *
 *  pick up where we left off in the previous file.  If there is no        *
 *  previous file, return "T_null".                                        *
 *                                                                         *
 *-------------------------------------------------------------------------*/
Token & Token::operator++( )
    {
    if( put_back ) 
        {
        put_back = 0;
        return *this;
        }

    // If we've reached the end of an include file, pop the stack.

    for(;;)
        {
        NextTok();  
        if( type == T_directive ) 
            {
            if( HandleDirective() ) break;
            }
        else if( type == T_null ) 
            {
            fclose( frame.source );
            if( !PopFrame() ) break;
            }
        else break;  // We have a real token.
        }

    // Now fill in the value fields if the token is a number. 

    switch( type )
        {
        case T_integer : ivalue = atoi( spelling ); break;
        case T_float   : fvalue = atof( spelling ); break;
        case T_null    : if( if_nesting > 0 ) Error( T_no_endif ); break;
        default        : break;
        }

    return *this;
    }

Token & Token::operator++( int )
    {
    fprintf( stderr, "Postfix increment is not implemented for the Token class.\n" );
    return *this;
    }

/*-------------------------------------------------------------------------*
 *  T o k e n   Push & Pop Frame                                           *
 *                                                                         *
 *  These functions are used to create and destroy the context "frames"    *
 *  that are used to handle nested files (via "include").                  *
 *                                                                         *
 *-------------------------------------------------------------------------*/
void Token::PushFrame( FILE *fp, char *fname )
    {
    // Create a copy of the current (top-level) frame.

    TokFrame *n = new TokFrame;
    *n = frame;

    // Now overwrite the top-level frame with the new state.

    frame.next   = n;
    frame.source = fp;
    frame.line   = 1;
    frame.column = 0;
    frame.fname  = strdup( fname );
    pushed       = NullChar;
    }

int Token::PopFrame()
    {
    if( frame.next == NULL ) return 0;
    TokFrame *old = frame.next;
    frame = *old;
    delete   old;  // Delete the frame that we just copied from.
    return 1;
    }

/*-------------------------------------------------------------------------*
 *  Miscellaneous Functions                                                *
 *                                                                         *
 *-------------------------------------------------------------------------*/
void Token::Init()
    {
    case_sensitive = 1;
    put_back       = 0;
    pushed         = NullChar;
    if_nesting     = 0;
    frame.source   = NULL;
    frame.next     = NULL;
    frame.fname    = NULL;
    first          = NULL;
    last           = NULL;
    table          = NULL;
    pushed         = NullChar;
    SearchArgs();  // Search command-line args for macro definitions.
    }

const char* Token::Spelling() const 
    { 
    return spelling;    
    }

char Token::Char() const 
    { 
    return spelling[0];
    }

const char* Token::FileName() const
    { 
    static char *null_string = "";
    if( frame.fname == NULL ) return null_string;
    else return frame.fname; 
    }

float Token::Fvalue() const
    {
    float val = 0.0;
    if( type == T_float   ) val = fvalue;
    if( type == T_integer ) val = ivalue;
    return val;
    }

void Token::GetName( char *name, int max )
    {
    int c;
    for( int i = 1; i < max; i++ )
        {
        if( NonWhite(c) == '>' ) 
            { 
            *name = NullChar; 
            return; 
            }
        *name++ = c;
        }
    Error( T_name_too_long );
    }

void Token::AddPath( const char *new_path )
    {
    char *name = strdup( new_path );
    if( name == NULL ) return;
    TokPath *p = new TokPath;
    p->next = NULL;
    p->path = name;
    if( first == NULL ) first = p;
    else last->next = p;
    last = p;
    }

void Token::ClearPaths()
    {
    TokPath *p = first;
    while( p != NULL )
        {
        TokPath *q = p->next;
        delete[] p->path;  // delete the string.
        delete   p;        // delete the path structure.
        p = q;
        }
    first = NULL;
    last  = NULL;
    }

FILE *Token::ResolveName( const char *name )
    {
    char resolved[128];
    for( const TokPath *p = first; p != NULL; p = p->next )
        {
        strcpy( resolved, p->path );
        strcat( resolved, "/"     );
        strcat( resolved, name    );
        FILE *fp = fopen( resolved, "r" );
        if( fp != NULL ) return fp;
        }
    Error( T_file_not_found, name );
    return NULL;
    }

void Token::CaseSensitive( int on_off = 1 ) 
    { 
    case_sensitive = on_off; 
    }

void Token::Debug( FILE *fp ) 
    { 
    debug = fp;
    }

void Token::AddMacro( const char *macro, const char *repl, TokType t )
    {
    if( table == NULL ) // Create and initialize the table.
        {
        table = new TokMacroPtr[ HashConst ];
        for( int j = 0; j < HashConst; j++ ) table[j] = NULL;
        }
    int i = HashName( macro );    
    TokMacro *m = new TokMacro;
    m->next   = table[i];
    m->macro  = strdup( macro );
    m->repl   = strdup( repl  );
    m->type   = t;
    table[i]  = m;
    }

void Token::Args( int argc_, char *argv_[] )
    {
    argc = argc_;  // Set the static variables.
    argv = argv_;
    }

void Token::SearchArgs( )
    {
    TokType type = T_null;
    for( int i = 1; i < argc; i++ )
        {
        if( strcmp( argv[i], "-macro" ) == 0 )
            {
            if( i+2 >= argc ) 
                {
                fprintf( stderr, "(Token) ERROR macro argument(s) missing\n" );
                return;
                }
            char *macro = argv[i+1];
            char *repl  = argv[i+2];
            if( isAlpha  ( repl[0] ) ) type = T_ident  ; else
            if( isInteger( repl    ) ) type = T_integer; else
                                       type = T_float  ;
            AddMacro( macro, repl, type );
            i += 2;
            }
        }
    }



