
/*
 * scan.y
 *
 * (C) Copyright 1995 Archie L. Cobbs
 *
 */

/*
	The assembler makes two passes. The first pass is just to
	collect the defined symbols and their values (either known
	values or segment base relative offsets). So this all
	happens twice.
*/

/********************************************************************
 *						DEFINITIONS									*
 ********************************************************************/

/* Header stuff	*/

%{

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include "util.h"
#include "error.h"
#include "names.h"
#include "asm.h"
#include "parse.tab.h"

/* Macros */

	#define	yyin			gInFile
	#define	YY_INPUT(b,r,m)	((r) = LoadBuf((b), (m)));

/* Return a specific pseudo-op */

	#define	Pseudo(x)		{												\
								BEGIN(Args);								\
								return(x);									\
							}

/* Add a character to current string literal */

	#define	add_char(c,h)	{												\
								if (textptr - textbuf >= MAX_STRING)		\
									uerror("string too long");				\
								else										\
									*textptr++ = (h) ? ((c) | gHiBit) : (c);\
							}

/* What to do when we get an unexpected character */

	#define	bad_char(s)		{												\
								if (!isprint(*yytext))						\
									uerror("illegal character: $%02X"		\
										" '%c'", *yytext, *yytext);			\
								else										\
									uerror(s);								\
								BEGIN(Ignore);								\
							}

/* Internal variables	*/

	static char				*textptr, textbuf[MAX_STRING];

/* Internal functions */

	static	int				LoadBuf(char *buf, int max);

%}

/*
 * Start conditions, or "states":
 *
 * INITIAL	Before anything happens
 * Newline	We're at the very beginning of a line
 * Opcode	Looking for an opcode
 * Args		Looking for arguments to the opcode
 * Dquote	We're in the middle of a string
 * Squote	We're in the middle of a quoted character
 * Ignore	We're commenting till the end of the line
 * Line1,2	Reading a #line preprocessor directive
 */

%x	Newline
%x	Opcode
%x	Args
%x	Dquote
%x	Squote
%x	Ignore
%x	Line1
%x	Line2

/* Definitions	*/

LET					[a-zA-Z]
DIG					[0-9]
DIGS				{DIG}+
BDIG				[01]
BDIGS				{BDIG}+
ODIG				[0-7]
ODIGS				{ODIG}+
HDIG				[0-9A-Fa-f]
HDIGS				{HDIG}+
WS					[ \t\r\f\v]
ID					({LET}|_)({LET}|{DIG}|_)*

%%

	/********************************************************************
	 *						RULES										*
	 ********************************************************************/

	/****** INITIAL MODE *******/

<INITIAL>.|\n					{
									unput(*yytext);
									BEGIN(Newline);
								}

	/****** END OF LINE, END OF FILE, COMMENTS (CONSOLIDATED) ******/

<Newline,Opcode,Args,Ignore>\n	{
									BEGIN(Newline);
									return('\n');
								}

<Opcode,Args,Ignore,Line1,Line2><<EOF>>		{
									BEGIN(Newline);
									return('\n');
								}

<Newline><<EOF>>				yyterminate();

<Newline,Opcode,Args>;			BEGIN(Ignore);

	/****** NEWLINE MODE *******/

	/* Initial whitespace? Start looking for an opcode */

<Newline>{WS}+					BEGIN(Opcode);

	/* First character an asterisk? Ignore line */

<Newline>\*						BEGIN(Ignore);

	/* Label? */

<Newline>{ID}					{
									yytext[yyleng] = 0;
									yylval.id = NameID(gNames, yytext);
									BEGIN(Opcode);
									return(LABEL);
								}
<Newline>{ID}:					{
									yytext[yyleng - 1] = 0;
									yylval.id = NameID(gNames, yytext);
									BEGIN(Opcode);
									return(LABEL);
								}

	/* Line directive? */

<Newline>#						|
<Newline>#{WS}*line				{
									BEGIN(Line1);
								}

	/* Weird char? */

<Newline>.						bad_char("badly formed label");

	/****** LINE DIRECTIVES *******/

<Line1>line

<Line1>{DIGS}+					{
									gLineNumber = atoi(yytext);
									BEGIN(Line2);
								}

<Line2>\".*\"					{
									yytext[yyleng - 1] = 0;
									gFileName = Name(gNames,
											NameID(gNames, yytext + 1));
								}

<Line1,Line2>.

<Line1,Line2>\n					BEGIN(Newline);

	/****** OPCODE MODE *******/

	/* Munch whitespace */

<Opcode>{WS}*

	/* Instruction or macro name */

<Opcode>{ID}					{
									int		k;

									for (k = 0; k < yyleng; k++)
										yytext[k] = toupper(yytext[k]);
									yytext[yyleng] = 0;
									yylval.id = NameID(gNames, yytext);
									BEGIN(Args);
									return(IDENT);
								}

	/* Assignment */

<Opcode>=						{
									BEGIN(Args);
									return('=');
								}

	/* Pseudo-opcodes */

<Opcode>\.align					Pseudo(ALIGN);
<Opcode>\.ascii					Pseudo(ASCII);
<Opcode>\.asciz					Pseudo(ASCIZ);
<Opcode>\.assert				Pseudo(ASSERT);
<Opcode>\.begin					Pseudo(BEGNN);
<Opcode>\.bss					Pseudo(BSS);
<Opcode>\.byte					Pseudo(BYTE);
<Opcode>\.code					Pseudo(CODE);
<Opcode>\.data					Pseudo(DATA);
<Opcode>\.dbyt					Pseudo(DBYT);
<Opcode>\.dci					Pseudo(DCI);
<Opcode>\.ds					Pseudo(SPACE);
<Opcode>\.space					Pseudo(SPACE);
<Opcode>\.end					Pseudo(END);
<Opcode>\.endm					Pseudo(ENDM);
<Opcode>\.equ					Pseudo('=');
<Opcode>\.expand				Pseudo(EXPAND);
<Opcode>\.export				Pseudo(GLOBAL);
<Opcode>\.extern				Pseudo(IMPORT);
<Opcode>\.import				Pseudo(IMPORT);
<Opcode>\.macro					Pseudo(MACRO);
<Opcode>\.float					Pseudo(FLOAT);
<Opcode>\.global				Pseudo(GLOBAL);
<Opcode>\.hibit					Pseudo(HIBIT);
<Opcode>\.int					Pseudo(LONG);
<Opcode>\.long					Pseudo(LONG);
<Opcode>\.word					Pseudo(WORD);
<Opcode>\.zero					Pseudo(ZERO);
<Opcode>\.zimport				Pseudo(ZIMPORT);
<Opcode>\.if					Pseudo(IF);
<Opcode>\.else					Pseudo(ELSE);
<Opcode>\.endif					Pseudo(ENDIF);

	/* Unknown pseudo-op */

<Opcode>\.{ID}					{
									uerror("undefined pseudo-op");
									BEGIN(Ignore);
								}

	/* Junk */

<Opcode>.						bad_char("bad opcode");

	/****** ARGS MODE *******/

	/* Ignore whitespace once past the operand */

<Args>{WS}*

	/* Integer constants	*/

<Args>%{BDIGS}					{
									char	*s;

									for (yylval.intval = 0, s = yytext + 1;
											*s; s++)
										yylval.intval =
											(yylval.intval << 1) | (*s == '1');
									return(INTLIT);
								}

<Args>0{ODIGS}					{
									sscanf(yytext + 1, "%o", &yylval.intval);
									return(INTLIT);
								}

<Args>{DIGS}					{
									sscanf(yytext, "%d", &yylval.intval);
									return(INTLIT);
								}

<Args>0x{HDIGS}					{
									sscanf(yytext + 2, "%x", &yylval.intval);
									return(INTLIT);
								}

<Args>\${HDIGS}					{
									sscanf(yytext + 1, "%x", &yylval.intval);
									return(INTLIT);
								}

	/* Floating constants */

<Args>{DIG}+"."{DIG}*			|
<Args>{DIG}+"."{DIG}*[eE][+-]?{DIG}*	{
									yylval.floatval = atof(yytext);
									return(FLOATLIT);
								}

	/* Character constants	*/

<Args>\'						{
									*(textptr = textbuf) = '\0';
									BEGIN(Squote);
								}

	/* String constants	*/

<Args>\"						{
									*(textptr = textbuf) = '\0';
									BEGIN(Dquote);
								}

	/* Normal Label */

<Args>{ID}						{
									yytext[yyleng] = 0;
									yylval.id = NameID(gNames, yytext);
									return(IDENT);
								}

	/* Special symbols	*/

<Args>\(|\)|,|&|\||<|>|\?|:		return(*yytext);
<Args>\+|-|\*|\/|%|^|~|!		return(*yytext);
<Args>\[|\]						return(*yytext);

<Args>"#"						return(LSB);
<Args>"#<"						return(LSB);
<Args>"#>"						return(MSB);

<Args>"<<"						return(LSH);
<Args>">>"						return(RSH);

<Args>"&&"						return(LAND);
<Args>"||"						return(LOR);

<Args>"=="						return(EQU);
<Args>"!="						return(NEQ);
<Args>"<="						return(LE);
<Args>">="						return(GE);

	/* Junk */

<Args>.							bad_char("bad operand");

	/***** CHARACTER AND STRING CONSTANTS ******/

	/* Terminate string constant */

<Dquote>\"						{
									yylval.textval.chars = GetMem(
										yylval.textval.len = textptr - textbuf);
									memcpy(yylval.textval.chars,
										textbuf, yylval.textval.len);
									BEGIN(Args);
									return(TEXTLIT);
								}

	/* Terminate char constant */

<Squote>'						{
									if (--textptr != textbuf)
										uerror("bad character constant");
									yylval.charval = *textptr;
									BEGIN(Args);
									return(CHARLIT);
								}

	/* Special escaped chars */

<Squote,Dquote>\\n				add_char('\n', TRUE);
<Squote,Dquote>\\t				add_char('\t', TRUE);
<Squote,Dquote>\\v				add_char('\v', TRUE);
<Squote,Dquote>\\b				add_char('\b', TRUE);
<Squote,Dquote>\\r				add_char('\r', TRUE);
<Squote,Dquote>\\f				add_char('\f', TRUE);
<Squote,Dquote>\\a				add_char('\a', TRUE);
<Squote,Dquote>\\e				add_char(0x1B, TRUE);
<Squote,Dquote>\\\\				add_char('\\', TRUE);
<Squote,Dquote>\\'				add_char('\'', TRUE);
<Squote,Dquote>\\\"				add_char('"', TRUE);

	/* End of line during quote? */

<Squote,Dquote>\n				|
<Squote,Dquote>\\\n				{
									uerror("unterminated char or string constant");
									unput('\n');
									BEGIN(Ignore);
								}

	/* Generic escaped chars -- just swallow backslash */

<Squote,Dquote>\\

	/* Octal character */

<Squote,Dquote>\\{ODIGS}{1,3}	{
									int		result = 0;

									if (sscanf(yytext + 1, "%o", &result) != 1)
										cerror("can't scan octal character");
									if (result & ~0xFF)
										uerror("octal character out of bounds");
									add_char(result & 0xFF, FALSE);
								}

	/* Hex character */

<Squote,Dquote>\\x{HDIGS}{1,2}	{
									int		result = 0;

									if (sscanf(yytext + 2, "%x", &result) != 1)
										cerror("can't scan hex character");
									if (result & ~0xFF)
										uerror("hex character out of bounds");
									add_char(result & 0xFF, FALSE);
								}

<Squote,Dquote>\\x				{
									uerror("bad hex character constant");
								}


<Squote,Dquote>.				{
									if (!isprint(*yytext))
										uerror("illegal character $%02X in char"
												" or string constant", *yytext);
									else
										add_char(*yytext, TRUE);
								}

<Squote,Dquote><<EOF>>			{
									uerror("end of file in char or string constant");
									yyterminate();
								}

	/* Comments extend to end of line */

<Ignore>.*

%%

static	int
LoadBuf(char *buf, int max)
{
	int			space, xfer, num;
	static int	posn = 0;

/* Read up to next newline (or max chars) */

	if (fgets(buf, max, yyin) == NULL)
		return(YY_NULL);
	num = strlen(buf);

/* Transfer into line buffer for listing */

	if ((space = MAX_LISTING_LINE - posn - 1) > 0)
	{
		xfer = buf[num - 1] == '\n' ? num - 1 : num;
		xfer = (space < xfer) ? space : xfer;
		memcpy(gLineBuf + posn, buf, xfer);
		posn += xfer;
	}

/* If we got a newline, terminate and reset line buffer */

	if (buf[num - 1] == '\n')
	{
		gLineBuf[posn] = 0;
		posn = 0;
		if (gDebug)
			fprintf(stderr, "READ--->%s\n", gLineBuf);
	}

/* Return number read into buffer */

	return(num);
}

