A list of limitations:
The tokenizer assumees that all integer values are within the range of int.
It does not support macro expansion in conditional expressions.
It implements the following grammar for the conditions, where conditional_or_expr is the root:
conditional_primary
: '(' conditional_or_exp ')'
| 'defined' ( '(' ident ')' | ident )
| integer
| ident
.
conditional_unary_expr
: '!' conditional_primary
| conditional_primary
.
conditional_compare_expr
: conditional_unary_expr
('==' conditional_unary_expr
|'!=' conditional_unary_expt ) OPT
.
conditional_and_expr : conditional_compare_expr CHAIN '&&' .
conditional_or_expr : conditional_and_expr CHAIN '||' .
The C-compiler parses the input according to the following grammar, where program is the root:
primary_expr
: identifier
| integer
| char
| string
| '(' expr ')'
.
postfix_expr
: primary_expr
| postfix_expr '[' expr ']'
| postfix_expr '(' assignment_expr LIST ')'
| postfix_expr '.' ident
| postfix_expr '->' ident
| postfix_expr '++'
| postfix_expr '--'
.
unary_expr
: '++' unary_expr
| '--' unary_expr
| '&' cast_expr
| '*' cast_expr
| '+' cast_expr
| '-' cast_expr
| '~' cast_expr
| '!' cast_expr
| 'sizeof' '(' sizeof_type ')'
| 'sizeof' unary_expr
| postfix_expr
.
sizeof_type
: 'char'
| 'int'
| 'unsigned' ('int') OPT
| 'double'
| 'void' ('*') OPT
| 'struct' ident
| ident
| sizeof_type '*'
.
cast_expr
: '(' abstract_declaration ')' cast_expr
| unary_expr
.
l_expr1
: cast_expr
| l_expr1 '*' cast_expr
| l_expr1 '/' cast_expr
| l_expr1 '%' cast_expr
.
l_expr2
: l_expr1
| l_expr2 '+' l_expr1
| l_expr2 '-' l_expr1
.
l_expr3
: l_expr2
| l_expr3 '<<' l_expr2
| l_expr3 '>>' l_expr2
.
l_expr4
: l_expr3
| expr4 '<=' l_expr3
| expr4 '>=' l_expr3
| expr4 '<' l_expr3
| expr4 '>' l_expr3
| expr4 '==' l_expr3
| expr4 '!=' l_expr3
.
l_expr5
: l_expr4
| l_expr5 '^' l_expr4
.
l_expr6
: l_expr5
| l_expr6 '&' l_expr5
.
l_expr7
: l_expr6
| l_expr7 '|' l_expr6
.
l_expr8
: l_expr7
| l_expr8 '&&' l_expr7
.
l_expr9
: l_expr8
| l_expr9 '||' l_expr8
.
conditional_expr
: l_expr9 '?' l_expr9 ':' conditional_expr
| l_expr9
.
assignment_expr
: conditional_expr
(( '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '<<=' | '>>=' | '&=' | '|=' | '^=')
assignment_expr
) SEQ
.
expr : assignment_expr LIST
array_indexes : expr ']' ('[' array_indexes) OPT .
declaration
: ('typedef' | 'extern' | 'inline' | 'static' ) SEQ OPT
type_specifier
( ';'
| func_declarator '(' declaration LIST (',' '...') OPT ')'
( ';'
| '{' statements '}'
)
| ( declarator ( '=' initializer ) OPT ) LIST ";"
)
.
func_declarator
: '*' OPT SEQ ( ident | '(' '*' ident ')')
.
declarator
: '*' OPT SEQ
( ident | '(' '*' ident ')')
( ('[' ']') OPT '[' array_indexes ) OPT
.
type_specifier
: 'const' OPT
( 'char' 'const' OPT
| 'unsigned' ('char' | 'short' | 'long' 'long' OPT | 'int') OPT
| 'short'
| 'int'
| 'long' ('double' | 'long') OPT
| 'float'
| 'double'
| 'void'
| 'struct' struct_or_union_specifier
| 'union' struct_or_union_specifier
| 'enum' enum_specifier
| ident
)
.
struct_or_union_specifier : ident OPT ('{' declaration SEQ OPT "}") OPT .
enum_specifier : ident OPT ('{' (ident ('=' constant_expr) OPT ) LIST '}') OPT .
initializer
: '{' initializer LIST OPT ',' OPT '}'
| assignment_expr
.
statement
: (ident ':') SEQ OPT
( 'if' '(' expr ')' statement ('else' statement) OPT
| 'while' '(' expr ')' statement
| 'do' statement 'while' '(' expr ')' ';'
| 'for' '(' (declaration | expr ';') expr ';' expr ')' statement
| 'break' ';'
| 'continue' ';'
| 'switch' '(' expr ')' '{'
( ( 'case' expr ':' | 'default' ':') SEQ statement SEQ ) SEQ '}'
| 'return' expr OPT ';'
| 'goto' ident ';'
| '{' statements '}'
| expr ';'
)
.
statements : (declaration | statement) SEQ OPT .
program : declaration SEQ OPT .