blob: af66b2b8438447c3dbad90d8605799c72addd084 [file] [log] [blame]
#ifndef TOKEN_H
#define TOKEN_H
/*
* Basic tokenization structures. NOTE! Those tokens had better
* be pretty small, since we're going to keep them all in memory
* indefinitely.
*
* Copyright (C) 2003 Transmeta Corp.
* 2003 Linus Torvalds
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <sys/types.h>
#include "lib.h"
/*
* This describes the pure lexical elements (tokens), with
* no semantic meaning. In other words, an identifier doesn't
* have a type or meaning, it is only a specific string in
* the input stream.
*
* Semantic meaning is handled elsewhere.
*/
enum constantfile {
CONSTANT_FILE_MAYBE, // To be determined, not inside any #ifs in this file
CONSTANT_FILE_IFNDEF, // To be determined, currently inside #ifndef
CONSTANT_FILE_NOPE, // No
CONSTANT_FILE_YES // Yes
};
extern const char *includepath[];
struct stream {
int fd;
const char *name;
const char *path; // input-file path - see set_stream_include_path()
const char **next_path;
/* Use these to check for "already parsed" */
enum constantfile constant;
int dirty, next_stream, once;
struct ident *protect;
struct token *ifndef;
struct token *top_if;
};
extern int input_stream_nr;
extern struct stream *input_streams;
extern unsigned int tabstop;
extern int *hash_stream(const char *name);
struct ident {
struct ident *next; /* Hash chain of identifiers */
struct symbol *symbols; /* Pointer to semantic meaning list */
unsigned char len; /* Length of identifier name */
unsigned char tainted:1,
reserved:1,
keyword:1;
char name[]; /* Actual identifier */
};
enum token_type {
TOKEN_EOF,
TOKEN_ERROR,
TOKEN_IDENT,
TOKEN_ZERO_IDENT,
TOKEN_NUMBER,
TOKEN_CHAR,
TOKEN_CHAR_EMBEDDED_0,
TOKEN_CHAR_EMBEDDED_1,
TOKEN_CHAR_EMBEDDED_2,
TOKEN_CHAR_EMBEDDED_3,
TOKEN_WIDE_CHAR,
TOKEN_WIDE_CHAR_EMBEDDED_0,
TOKEN_WIDE_CHAR_EMBEDDED_1,
TOKEN_WIDE_CHAR_EMBEDDED_2,
TOKEN_WIDE_CHAR_EMBEDDED_3,
TOKEN_STRING,
TOKEN_WIDE_STRING,
TOKEN_SPECIAL,
TOKEN_STREAMBEGIN,
TOKEN_STREAMEND,
TOKEN_MACRO_ARGUMENT,
TOKEN_STR_ARGUMENT,
TOKEN_QUOTED_ARGUMENT,
TOKEN_CONCAT,
TOKEN_GNU_KLUDGE,
TOKEN_UNTAINT,
TOKEN_ARG_COUNT,
TOKEN_IF,
TOKEN_SKIP_GROUPS,
TOKEN_ELSE,
};
/* Combination tokens */
#define COMBINATION_STRINGS { \
"+=", "++", \
"-=", "--", "->", \
"*=", \
"/=", \
"%=", \
"<=", ">=", \
"==", "!=", \
"&&", "&=", \
"||", "|=", \
"^=", "##", \
"<<", ">>", "..", \
"<<=", ">>=", "...", \
"", \
"<", ">", "<=", ">=" \
}
extern unsigned char combinations[][4];
enum special_token {
SPECIAL_BASE = 256,
SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
SPECIAL_INCREMENT,
SPECIAL_SUB_ASSIGN,
SPECIAL_DECREMENT,
SPECIAL_DEREFERENCE,
SPECIAL_MUL_ASSIGN,
SPECIAL_DIV_ASSIGN,
SPECIAL_MOD_ASSIGN,
SPECIAL_LTE,
SPECIAL_GTE,
SPECIAL_EQUAL,
SPECIAL_NOTEQUAL,
SPECIAL_LOGICAL_AND,
SPECIAL_AND_ASSIGN,
SPECIAL_LOGICAL_OR,
SPECIAL_OR_ASSIGN,
SPECIAL_XOR_ASSIGN,
SPECIAL_HASHHASH,
SPECIAL_LEFTSHIFT,
SPECIAL_RIGHTSHIFT,
SPECIAL_DOTDOT,
SPECIAL_SHL_ASSIGN,
SPECIAL_SHR_ASSIGN,
SPECIAL_ELLIPSIS,
SPECIAL_ARG_SEPARATOR,
SPECIAL_UNSIGNED_LT,
SPECIAL_UNSIGNED_GT,
SPECIAL_UNSIGNED_LTE,
SPECIAL_UNSIGNED_GTE,
};
struct string {
unsigned int length:31;
unsigned int immutable:1;
char data[];
};
/* will fit into 32 bits */
struct argcount {
unsigned normal:10;
unsigned quoted:10;
unsigned str:10;
unsigned vararg:1;
};
/*
* This is a very common data structure, it should be kept
* as small as humanly possible. Big (rare) types go as
* pointers.
*/
struct token {
struct position pos;
struct token *next;
union {
const char *number;
struct ident *ident;
unsigned int special;
struct string *string;
int argnum;
struct argcount count;
char embedded[4];
};
};
#define MAX_STRING 8191
static inline struct token *containing_token(struct token **p)
{
void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
return addr;
}
#define token_type(x) ((x)->pos.type)
/*
* Last token in the stream - points to itself.
* This allows us to not test for NULL pointers
* when following the token->next chain..
*/
extern struct token eof_token_entry;
#define eof_token(x) ((x) == &eof_token_entry)
extern int init_stream(const char *, int fd, const char **next_path);
extern const char *stream_name(int stream);
extern struct ident *hash_ident(struct ident *);
extern struct ident *built_in_ident(const char *);
extern struct token *built_in_token(int, const char *);
extern const char *show_special(int);
extern const char *show_ident(const struct ident *);
extern const char *show_string(const struct string *string);
extern const char *show_token(const struct token *);
extern const char *quote_token(const struct token *);
extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
extern void show_identifier_stats(void);
extern struct token *preprocess(struct token *);
static inline int match_op(struct token *token, int op)
{
return token->pos.type == TOKEN_SPECIAL && token->special == op;
}
static inline int match_ident(struct token *token, struct ident *id)
{
return token->pos.type == TOKEN_IDENT && token->ident == id;
}
#endif