I mentioned in my previous post that I parse expressions in my mini language using a strange shunting yard algorithm. It mixes explicit stack management and recursive decent. Its operation makes intuitive sense to me which is why I use it, and the code to implement it happens to also be very short.
An expression parser is one of the hardest parts of a recursive descent parser to write, but with a little practice it doesn’t have to be hard at all. I’m sure there are much better ways out there to implement an expression parser so I would be interested to see them if anyone has any good links. Are they smaller then the ~100 lines (including comments) this one takes up?
The code presented below is almost identical in operation to the one from my mini parser. I added a tiny Lexer and REPL loop to it so it can be used as a very simple infix to postfix expression evaluator. I also used the C libraries setjmp() function to add very simple error handling.
It supports normal operators [+ – * /], parenthesis, the unary not operator [ ! ], and function calls. Only single character variables and literals are supported, so that the focus remains on the parser rather then the Lexer. Spaces are not supported either.
Try a simple expression such as:
> A*B-C(1+3)
> 1*(3+4)
> 2*!A
In the event that wordpress feels the need to mangle my source code here is an alternative link which will maintain the formatting: http://pastebin.com/TMkT17C6
#include <assert.h>
#include <stdint.h>
#include <vector>
#include <setjmp.h>
#include <stdio.h>
typedef char token_e;
namespace {
// return the precedence of an operator
int op_prec(token_e o) {
switch (o) {
case ('&'): return 1;
case ('+'):
case ('-'): return 2;
case ('*'):
case ('/'): return 3;
default: return 0;
}
}
// return true if token is an operator
bool is_op(token_e o) {
return op_prec(o) != 0;
}
// return true if token is a variable identifier
bool is_var(token_e o) {
return (o>='A' && o<='Z') || (o>='a' && o<='z');
}
// return true if token is a numeric literal
bool is_num(token_e o) {
return (o>='0' && o<='9');
}
} // namespace {}
struct parser_t {
std::vector<token_e> op_;
const char * s_;
jmp_buf error_buf;
void error() {
longjmp (error_buf, 1);
}
parser_t(const char * stream)
: op_()
, s_(stream)
{}
char found_op () { return (is_op (*s_)) ? *(s_++) : 0; }
char found_var() { return (is_var (*s_)) ? *(s_++) : 0; }
char found_num() { return (is_num (*s_)) ? *(s_++) : 0; }
// check for specific token in the token stream
bool found(char ch) {
if (*s_ == ch) {
s_++;
return true;
}
return false;
}
// unconditionally expect a token in the token stream
void expect(char ch) {
if (*(s_++) != ch) {
printf ("\nexpected '%c'\n", ch);
error ();
}
}
// ---- ---- ---- ---- ---- ---- ---- ---- <expression parser>
void pop_op() {
assert(op_.size() > 0);
token_e t = op_.back();
putchar (t);
op_.pop_back();
}
void push_op(uint32_t tide, token_e tok) {
while (op_.size() > tide) {
if (op_prec (op_.back ()) > op_prec (tok))
pop_op ();
else
break;
}
op_.push_back(tok);
}
void pop_all_op(uint32_t tide) {
while (op_.size() > tide)
pop_op();
}
void parse_lhs(uint32_t tide) {
// parenthesis
if (found('(')) {
parse_expr ();
expect (')');
}
// identifier
else if (char var = found_var()) {
// identifier name
putchar (var);
// index an object
while (found('.')) {
if (char ix = found_var()) {
putchar (ix);
putchar ('@');
}
else {
printf ("\nexpecting identifier, got '%c'\n", *s_);
error ();
}
}
// function call
if (found('(')) {
//
uint32_t num_args = 0;
// argument list
if (!found(')')) {
do {
++num_args;
parse_expr();
} while (found(','));
expect (')');
}
// call instruction
printf("<%d>",-num_args);
}
}
// numeric literal
else if (char num = found_num()) {
putchar (num);
}
// unexpected token
else {
printf ("\nunexpected token '%c'\n", *s_);
error ();
}
}
// parse an expression with extended control
void parse_expr_ex(uint32_t tide) {
bool unary_not = found ('!');
parse_lhs(tide);
if (unary_not) {
// logical not instruction
putchar('!');
}
if (char op = found_op()) {
push_op (tide, op);
parse_expr_ex(tide);
}
}
// parse a full expression
void parse_expr() {
uint32_t tide = op_.size();
parse_expr_ex(tide);
pop_all_op(tide);
}
// ---- ---- ---- ---- ---- ---- ---- ---- </expression parser>
// parse root
void parse() {
if (!setjmp(error_buf)) {
parse_expr ();
} else {
printf ("unable to parse\n");
}
}
};
// supported operators:
// & and
// + - add sub
// * / mul div
// ! unary not
// example input:
// A+B*C*(0-3)
// A&!B
// 2+A(B,C+1)
int main() {
char buffer[1024];
// little REPL loop
for (;;) {
printf ("expr: ");
fgets(buffer, sizeof(buffer), stdin);
if (buffer[0] == '\0')
break;
parser_t parser(buffer);
printf ("post: ");
parser.parse();
printf ("\n\n");
}
return 0;
}
Pingback: A simpler tinier expression parser | The 8Bit Pimps Pixel Mine