
/* *********** */
/* Definitions */
/* *********** */

/* References */
/* Introduction to Flex: https://web.eecs.utk.edu/~bvz/teaching/cs461Sp11/notes/flex/#states  */
/* Comments in Lex and Yacc: */
/* (ref.1) https://stackoverflow.com/questions/37266322/creating-comments-in-lex-and-yacc */
/* (ref.2) https://stackoverflow.com/questions/40564020/how-can-we-add-comments-in-flex-bison-source-code */

/* This definition part comes in the first part of lex.yy.c */
/* The subroutine part comes at the end of lex.yy.c */ 

/* 1-1. Lines to be included at the beginning of generated scanner. */
%{
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "parser_state.h"
#include "common_string.h"
#include "node.h"


/* Generated by yacc. Variable values for tokens defined in parse.y. */
/* y.tab.h should be included after functions for nodes and struct_srings. */
#include "y.tab.h"

// LEX_RETURN(int c) works just in the sme way as calling return, but update parser_state's property such as line number.
#define LEX_RETURN(c) return  lex_return( yylloc_param, yytext, p, yyscanner, c)

/* lexerror is defined in subroutine section. */
/* But here you need prototypes to use in rules appropriately. */
extern void lexerror( parser_state* , yyscan_t ,  char);
extern void update_loc(YYLTYPE *loc, char *txt);

/* Location information needs to be updated manually. (Flex and bison provide framework and initialize variables, but do not update them.) */
/* https://stackoverflow.com/questions/656703/how-does-flex-support-bison-location-exactly See Brent's answer.*/
/* Usually, this update is done just before lexer action is performed by defining YY_USER_ACTION macro. */
#define YY_USER_ACTION update_loc( yylloc_param , yytext );


/* This function enables some modification of parameters before returning value to parser. */
static int
lex_return(YYLTYPE* loc, char* txt, parser_state *p, yyscan_t scanner, int c)
{
  return c;
}

/* Prevent implicit declaration of fileno() warning*/
// (ref) https://stackoverflow.com/questions/46213840/get-rid-of-warning-implicit-declaration-of-function-fileno-in-flex
int fileno(FILE *stream);
%}

/* IF YOU CANNOT LINKs FUNCTIONS WHEN CREATING REENTRANT PARSER, MAKE SURE CORRESPONDING OPTIONS ARE ENEABLED. */
/* 1-2. Options to be enabled in code */
/* Generated yylex()'s function prototype. */
/* bison-bridge option adds an additional parameter, "yyscan_t scanner", to yylex() function. */
/* lex-param option in parser.y adds additional parameters, in this case, yyscan_t scanner and parser_state *p */

/* (ref) http://gensoft.pasteur.fr/docs/flex/2.6.1/Bison-Bridge.html */
/* %option bison-bridge => global variable, yylval, becomes YYSTYPE* lval in arguemnt of yylex(). */
/* %option reentrant =>  yylex() is expected to have yyscan_t scanner in its arguments", and functions take one additional argument, scanner_t. */
/* https://www.cs.virginia.edu/~cr4bd/flex-manual/Extra-Reentrant-Argument.html#Extra-Reentrant-Argument */
%option bison-bridge reentrant 
%{
/* By putting appropriate declaration here, compiile time conflicts are resolved. The parameter names can be used in rule section as they are expanded in yylex().*/
#define YY_DECL int yylex(YYSTYPE* yylval_param, YYLTYPE* yylloc_param, parser_state* p, yyscan_t yyscanner )
%}

/* Track line number */
%option yylineno

/* Usually you do not need yywarp() function. */
%option noyywrap


/* Enable (start) state stack. */
/* Functions that become available: yy_push_state(int) , yy_pop_state(). I don't know the reason, but yy_top_state() caused segmentation error,  but YY_START works. */
/* Using start state in reentrant parser. http://www.stackoverflow.com/flex-reentrant-with_start-conditions  */
%option stack

/* 2. Macros that are expanded in rules */

CHAR   [a-zA-Z_]|[\302-\337][\200-\277]|[\340-\357][\200-\277][\200-\277]|[\360-\367][\200-\277][\200-\277]|[\370-\373][\200-\277][\200-\277][\200-\277][\200-\277]|[\374-\375][\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]
CHNUM  ({CHAR}|[0-9])
CHNUMP ({CHAR}|[0-9.])
WORD {CHAR}{CHNUMP}*

/* 3. (Start) states that can become active when BEGIN <state> is called in actions, but I use yy_push_state(int) and yy_pop_state(). BEGIN does not seem to work with state stack. */

%start  COMM COMMONE IFSTATE ELSESTATE



%%

	/***************/
	/* Rules            */
	/*                  */
	/* Rule priorities  */
	/* 1) The longest match is preferred. 2) Among rules which matched the same number of characters, the rule given first is preferred. . */
	/* (ref.) Sec5 Ambigous Source RUles in  http://dinosaur.compilertools.net/lex/index.html */
	/*                  */
	/* Operators in Lex */
	/* These should be escaped or enclosed with "". */
	/* " \ [ ] ^ - ? . * + | ( ) $ / { } % < > */
	/* **************** */

	/*  For comments */
<INITIAL,IFSTATE,ELSESTATE>"/*"				{ yy_push_state(COMM, yyscanner) ; };

<COMM>"*/"				{ 
						yy_pop_state(yyscanner) ;
						};

<COMM>\n				{};

<COMM>[^*]				{
						/* Not containing stars */
						};

<COMM>"*"/[^/]			{
						/* Words not starting with star and tailed with slash. */
						};

<COMM>.					{};

<INITIAL,IFSTATE,ELSESTATE>"//"					{ 
							/* printf("INITIAL:%d, COMM:%d, COMMONE:%d, IFSTATE:%d, ELSESTATE:%d\n", INITIAL, COMM, COMMONE, IFSTATE, ELSESTATE); */
							yy_push_state(COMMONE, yyscanner);
							/* printf("LEX: start : %d\n", YY_START);  */
							LEX_RETURN( TERMIN );
						};
<COMMONE>\n				{ 
							yy_pop_state(yyscanner); 
							/* printf("LEX: COMMONE finished, start => %d \n", YY_START); */
						};
<COMMONE>.*				{  };


	/* Keywords for DATA Step */
<INITIAL,IFSTATE,ELSESTATE>if		{		
				if( YY_START == ELSESTATE){
					yy_pop_state(yyscanner);
				}
				yy_push_state(IFSTATE, yyscanner);
				/* printf("IF STATE! %d\n", YY_START); */
				LEX_RETURN( KEY_IF );};
<INITIAL,IFSTATE,ELSESTATE>[\t \n]*else[\t \n]*	{
				yy_push_state(ELSESTATE, yyscanner);
				/* printf("ELSE STATE! %d\n", YY_START); */
				LEX_RETURN( KEY_ELSE );};
	/* Ignore \n once. */
<IFSTATE>"{"" "* {yy_pop_state(yyscanner); /* printf("POP STATE! %d\n", YY_START);*/ LEX_RETURN('{') ;  };
<IFSTATE>"{"" "*\n? {yy_pop_state(yyscanner); /* printf("POP STATE! %d\n", YY_START);*/ LEX_RETURN('{') ; };
<IFSTATE>\n" "*"{"" "*\n?  {yy_pop_state(yyscanner); /* printf("POP STATE! %d\n", YY_START);*/ LEX_RETURN('{') ;};
<IFSTATE>\n  {yy_pop_state(yyscanner); /* printf("POP STATE! %d\n", YY_START);*/ };
	/* Ignore \n once. */
<ELSESTATE>"{"" "* {yy_pop_state(yyscanner); /* printf("POP STATE! %d\n", YY_START);*/ LEX_RETURN('{') ; };
<ELSESTATE>"{"" "*\n? {yy_pop_state(yyscanner); /* printf("POP STATE! %d\n", YY_START);*/ LEX_RETURN('{') ; };
<ELSESTATE>\n" "*"{"" "*\n?  {yy_pop_state(yyscanner); /* printf("POP STATE! %d\n", YY_START);*/ LEX_RETURN('{') ; };
<ELSESTATE>\n  {yy_pop_state(yyscanner); /* printf("POP STATE! %d\n", YY_START);*/ };

	/* Ignore complete space lines */
<INITIAL>^[ \t]*\n					{};

	/* For operators */
<INITIAL,IFSTATE,ELSESTATE>"+"	LEX_RETURN( OP_PLUS );
<INITIAL,IFSTATE,ELSESTATE>"-"	LEX_RETURN( OP_SUB );
<INITIAL,IFSTATE,ELSESTATE>"*"	LEX_RETURN( OP_MULT );
<INITIAL,IFSTATE,ELSESTATE>"/"	LEX_RETURN( OP_DIV );
<INITIAL,IFSTATE,ELSESTATE>"%"	LEX_RETURN( OP_MOD );
<INITIAL,IFSTATE,ELSESTATE>"!"	{ LEX_RETURN( FACTOR );}
<INITIAL,IFSTATE,ELSESTATE>"^"	LEX_RETURN( OP_POWER );
<INITIAL,IFSTATE,ELSESTATE>"**"	LEX_RETURN( OP_POWER );

	/* For comparison */
<INITIAL,IFSTATE,ELSESTATE>"=="	LEX_RETURN( OP_EQ );
<INITIAL,IFSTATE,ELSESTATE>"!="	LEX_RETURN( OP_NEQ );
<INITIAL,IFSTATE,ELSESTATE>">"	LEX_RETURN( OP_GT );
<INITIAL,IFSTATE,ELSESTATE>"<"	LEX_RETURN( OP_LT );
<INITIAL,IFSTATE,ELSESTATE>">="	LEX_RETURN( OP_GE );
<INITIAL,IFSTATE,ELSESTATE>"<="	LEX_RETURN( OP_LE );

	/* For logical operators */
<INITIAL,IFSTATE,ELSESTATE>"and"	LEX_RETURN( AND );
<INITIAL,IFSTATE,ELSESTATE>"or"	LEX_RETURN( OR );
<INITIAL,IFSTATE,ELSESTATE>"&&"	LEX_RETURN( AND );
<INITIAL,IFSTATE,ELSESTATE>"||"	LEX_RETURN( OR );

	/* For assignment */
<INITIAL,IFSTATE,ELSESTATE>"<-"	LEX_RETURN( ASSIGN );
<INITIAL,IFSTATE,ELSESTATE>"="	LEX_RETURN( ASSIGN );

	/* For regular expression matching */
<INITIAL,IFSTATE,ELSESTATE>"=~"	LEX_RETURN( REXP_MATCH );

	/* For Parentheses and Blocks */
<INITIAL,IFSTATE,ELSESTATE>"("	LEX_RETURN( '(' );
<INITIAL,IFSTATE,ELSESTATE>")"	LEX_RETURN( ')' );
<INITIAL,IFSTATE,ELSESTATE>"{"	LEX_RETURN( '{' );
<INITIAL,IFSTATE,ELSESTATE>"}"	LEX_RETURN( '}' );

	/* For missing numbers */
<INITIAL,IFSTATE,ELSESTATE>"."		{
	yylval_param->nd = new_node_nan_double( );
	LEX_RETURN( NA_NUM );
};

	/* For Macro (Plan) */
<INITIAL,IFSTATE,ELSESTATE>"%{"	LEX_RETURN( PLCUR );
<INITIAL,IFSTATE,ELSESTATE>"%}"	LEX_RETURN( PRCUR );

	/* For list identities (Plan) */
<INITIAL,IFSTATE,ELSESTATE>","	LEX_RETURN( COMMA );

	/* Identifier such as variable names, and function names */
<INITIAL,IFSTATE,ELSESTATE>{WORD}		{
	size_t ident_len = strlen(yytext);
	char* new_str = (char*) malloc(sizeof(char) * (ident_len + 1));
	memcpy(new_str, yytext, ident_len);
	new_str[ident_len] = '\0';
	yylval_param->id = new_str ;
	LEX_RETURN( IDENT );
};

	/* Number */
<INITIAL,IFSTATE,ELSESTATE>([1-9][0-9]*)|0	{
	yylval_param->nd = new_node_int(yytext);
	LEX_RETURN( LIT_NUM );
};

<INITIAL,IFSTATE,ELSESTATE>(([1-9][0-9]*)|0)\.[0-9]*	{
	yylval_param->nd = new_node_double(yytext);
	LEX_RETURN( LIT_NUM );
};

	/* String literals */
	/* (ref.)  https://stackoverflow.com/questions/2039795/regular-expression-for-a-string-literal-in-flex-lex   */
	/* \"       Begining with double quote. */
	/* [^\\\"]  ???  */
	/* \\.      Escaped anything*/
	/* \"       Ending with double quote. */


<INITIAL,IFSTATE,ELSESTATE>\"([^\\\"]|\\.)*\"	{
	yylval_param->str = string_new_unescaped_and_delete_ori( string_new_with_len((yytext + 1), yyleng - 2), p->rexp_encoding);
	// printf("LEXER: LIT_STR %s \n", yytext);
	LEX_RETURN( LIT_STR );
};

<INITIAL,IFSTATE,ELSESTATE>\'([^\\\']|\\.)*\'	{
	yylval_param->str = string_new_with_len((yytext+1), yyleng - 2);
	LEX_RETURN( LIT_STR );
};

	/* Regular expression literals */
	/* re"/"[^("/")]([^("/")\n]|\\/)*"/" */
<INITIAL,IFSTATE,ELSESTATE>re[/][^/]([^/\n]|\\[/])*[/]	{
	yylval_param->str = string_new_with_len((yytext+3), yyleng - 4);
	LEX_RETURN( LIT_REXP ); 
};

	/*  Terminators */
	/* Sequential terminators are dealt as a single terminator. */
<INITIAL,IFSTATE,ELSESTATE>[;\n]+	{
	LEX_RETURN( TERMIN );
};

	/* Spaces */
<INITIAL,IFSTATE,ELSESTATE>[ \t]*	{ };

	/* Other characters */
<INITIAL,IFSTATE,ELSESTATE>. {
	char c = yytext[0];
	lexerror(p, yylloc_param, c);
};

%%
/* ************* */
/* User Code     */
/* ************* */

/* 'Functions and Macros Available in Reentrant C Scanners' may be helpful */
/* https://www.cs.virginia.edu/~cr4bd/flex-manual/Reentrant-Functions.html */

void lexerror(parser_state* p, yyscan_t scanner, char c){
	fprintf(stderr, "%s:%d:lexical error", p->fname, yyget_lineno(scanner));
	fprintf(stderr, "('%c').\n", c);
	exit(1);
}

void update_loc(YYLTYPE *loc, char *txt){
    loc->first_line = loc->last_line;
    loc->first_column = loc->last_column;
    for(int i = 0; txt[i] != '\0'; i++) {
        if(txt[i] == '\n') {
            loc->last_line++;
            loc->last_column = 0;
        }
        else {
            loc->last_column++;
        }
    }
}

