#ifndef _lt_token_h
#define _lt_token_h

#include "lt-hash.h"

/* constants etc. for tokenisation */

#define ORD 0
#define BREAK 1
#define SEP 2
#define EOL 3
#define TAGO 4
#define ENTO 5
#define ENTE 6
#define ENTN 7

#define EOLCODE -2

/* ORD is alphabetic, combines with others,
   BREAK is single char token
   SEP is separator, disappears
   EOL is end-of-line, acts as separator or collapsing break depending on
   user choice
   TAGO is SGML tag onset character, normally <
   ENTO is SGML entity onset character, normally &
   ENTN is SGML numeric/character entity signal, normally #
   ENTE is SGML optional entity offset character, normally ;
*/

/* Break characters encode themselves, as do all single-character tokens,
   except for breaking EOL which is coded with EOLCODE, defaults to treelab's
   BRK code */

#define STRBLKSIZE 65536

extern STD_API int EXPRT tokenise(const unsigned char **filenames,int nfiles,
	     int breakeol,int down,const unsigned char* sgmltg,
			  unsigned char seprChr);

extern STD_API int EXPRT untokenise(const char* filename,int breakeol);

extern STD_API int EXPRT decodeToken(int wc,unsigned char* buf);

extern STD_API boolean EXPRT InitDict(const char* out_fileroot,
		 const char* prev_fileroot,HashTab* hsh);

#endif /* _lt_token_h */
