/* Tokenizer.h - Copyright (c) Marc Krisnanto */

#ifndef _H_Tokenizer_
#define _H_Tokenizer_ 1


/** Tokenizer
	This is the Tokenizer class.
*/
extern Class TokenizerClass;


/** token_s
	This represents a token in a Tokenizer buffer.
*/
typedef struct token_s token_s;
struct token_s
{
	token_s    *next;      /* Next token */
	int         type;      /* One of TokenType */
	const char *cursor;    /* Position in source */
	long        len;       /* Length of token */
	long        line;      /* Line number in source */
	Any         thing;     /* Literal (String, Int, Float or 0) */
	int         nobr;      /* same line with next? */
};


/** TokenizerMAXPEEK
	The maximum look-ahead value.
*/
#define TokenizerMAXPEEK 7


/** TTokenizer
	This represents an instance of the Tokenizer class.

	Tokenizer objects interpret a source code into series of tokens.
	You must supply an ASCII string as the source code.
*/
struct TTokenizer
{
	OBJ_HEAD;
	token_s  *token;     /* current token */
	token_s  *last;      /* last token read */
	long      line;      /* current line */
	char     *cursor;    /* current cursor position */
	String    source;    /* ASCII source string */
	Set       literals;  /* where to store literals */
	int       indent;    /* current indentation level */

	/* Token buffer */
	token_s   tokbuf [TokenizerMAXPEEK + 1];
};


/** TokenizerFlag
	Flags used by Tokenizer objects.
*/
/* 0..9 not used but reserved by Container */
#define TokenizerFlagERROR    Flag10 /* set on error */
#define TokenizerFlagDOC      Flag11 /* include documentation strings? */
#define TokenizerFlagINDENT   Flag12 /* indentations are tokens? */
#define TokenizerFlagCONFIG   Flag13 /* for Config */
#define TokenizerFlagHTML     Flag14 /* /abc -> "</abc>" */
#define TokenizerFlagRESIDENT Flag15 /* compiling a resident module? */


/** TokenizerIncludeDoc
	Whether or not to include documentation strings.
*/
#define TokenizerIncludeDoc(self)      FlagTest  (self, TokenizerFlagDOC)
#define TokenizerSetIncludeDoc(self)   FlagSet   (self, TokenizerFlagDOC)
#define TokenizerUnsetIncludeDoc(self) FlagUnset (self, TokenizerFlagDOC)


/** TokenizerHasError
	Set/get the TokenizerFlagERROR flag which indicates that the Tokenizer
	has an error.

	Setting may only be done by Tokenizer.c.
*/
#define TokenizerHasError(self)   FlagTest  (self, TokenizerFlagERROR)
#define TokenizerSetError(self)   FlagSet   (self, TokenizerFlagERROR)
#define TokenizerUnsetError(self) FlagUnset (self, TokenizerFlagERROR)


/** TokenizerUseIndent
	Necessary because Compile.c needs to do it later after creating
	the Tokenizer.
*/
#define TokenizerUseIndent(self)      FlagTest  (self, TokenizerFlagINDENT)
#define TokenizerSetUseIndent(self)   FlagSet   (self, TokenizerFlagINDENT)
#define TokenizerUnsetUseIndent(self) FlagUnset (self, TokenizerFlagINDENT)


/** TokenizerUseHtml
	Necessary because Compile.c needs to do it later after creating
	the Tokenizer.
*/
#define TokenizerUseHtml(self)      FlagTest  (self, TokenizerFlagHTML)
#define TokenizerSetUseHtml(self)   FlagSet   (self, TokenizerFlagHTML)
#define TokenizerUnsetUseHtml(self) FlagUnset (self, TokenizerFlagHTML)


/** TokenizerIsResident
	Set/get the TokenizerFlagRESIDENT flag which indicates that the Tokenizer
	is being used to compile a Resident module.

	If set then all literals such as String, Int, Float, etc. are created
	with GcStatic instead of GcNew thus reducing the GC workload.
*/
#define TokenizerIsResident(self)    FlagTest  (self, TokenizerFlagRESIDENT)
#define TokenizerSetResident(self)   FlagSet   (self, TokenizerFlagRESIDENT)
#define TokenizerUnsetResident(self) FlagUnset (self, TokenizerFlagRESIDENT)


/** TokenizerIs
	Whether or not the object is a Tokenizer.
*/
#define TokenizerIs(o) (ClassOf(o) == TokenizerClass)


/** TokenizerKeyword
	Return the token ID, or 0 if the string is not a token.
*/
extern int TokenizerKeyword (const char *s, size_t len);


/** TokenizerName
	Return the token as a ={String} object.

	v{type} is one of #{TokenType}.
*/
extern String TokenizerName (int type);


/** TokenizerBy
	These frozen ={Dict} objects hold the list of all tokens except the first
	group C{NAME .. BRK}.

	@warn
	You must never modify them.
*/
extern Dict TokenizerByName;
extern Dict TokenizerById;


/** TokenizerNew TokenizerRoot
	Create a new Tokenizer object.

	Make sure that v{literals} is not frozen.

	v{op} is a combination of:
	@ul
	.TokenizerFlagDOC = include documentation strings
	.sorry, no other options are currently available
*/
extern Tokenizer TokenizerNew  (Set literals, String source, long op);
extern Tokenizer TokenizerRoot (Set literals, String source, long op);


/** TokenizerEos
	Return 1 if the end of the source has been reached, 0 otherwise.
*/
extern int TokenizerEos (Tokenizer self);


/** TokenizerReset
	Use this function to reset the Tokenizer.
*/
extern void TokenizerReset (Tokenizer self, Set literals, String source, long op);


/** TokenizerPeek
	Return the v{n}-th token type ahead. If the end of the source has been
	reached then 0 is returned.

	v{n} must be in 1 .. #{TokenizerMAXPEEK}.
*/
extern int TokenizerPeek (Tokenizer self, int n);


/** TokenizerTake
	Make the next token current.

	@warn
	Make sure to call #{TokenizerNext} before you call this function.
*/
extern void TokenizerTake (Tokenizer self);


/** TokenizerMatch
	Whether or not the next token is of a specified type. If the end of the
	source has been reached then 0 is returned.
*/
extern int TokenizerMatch (Tokenizer self, int type);


/** TokenizerNext
	Return the next token type. If the end of the source has been reached then
	0 is returned.
*/
extern int TokenizerNext (Tokenizer self);


/** TokenizerNeed
	Ensure that the next token is of a specified type. If it is then
	make it current and the type is returned. Otherwise 0 is returned.
*/
extern int TokenizerNeed (Tokenizer self, int type);


/** TokenizerNobr
	Whether or not the next token is on the same line as the current token.
	It is it then 1 is returned. If it is not or the end of the source has
	been reached then 0 is returned.
*/
extern int TokenizerNobr (Tokenizer self);


/** TokenType
	This is the list of all tokens.
	See the file C{Tokens.h} in the C{include} directory.
*/
#define TOKENIZER_IN_HEADER
#define t_(id, str) T_##id,
enum
{
#include "Tokens.h"
};
#undef t_
#undef TOKENIZER_IN_HEADER


/***/
/*--------------------------------------------------------------------------*/

/* These are called internally by Class.c to initialize the Tokenizer class. */
extern void TokenizerSetup ();
extern void TokenizerInit  ();

#endif /*_H_Tokenizer_*/
