I mentioned in my previous post that I parse expressions in my mini language using a strange shunting yard algorithm. It mixes explicit stack management and recursive decent. Its operation makes intuitive sense to me which is why I use it, and the code to implement it happens to also be very short.
An expression parser is one of the hardest parts of a recursive descent parser to write, but with a little practice it doesn’t have to be hard at all. I’m sure there are much better ways out there to implement an expression parser so I would be interested to see them if anyone has any good links. Are they smaller then the ~100 lines (including comments) this one takes up?
The code presented below is almost identical in operation to the one from my mini parser. I added a tiny Lexer and REPL loop to it so it can be used as a very simple infix to postfix expression evaluator. I also used the C libraries setjmp() function to add very simple error handling.
It supports normal operators [+ – * /], parenthesis, the unary not operator [ ! ], and function calls. Only single character variables and literals are supported, so that the focus remains on the parser rather then the Lexer. Spaces are not supported either.
Try a simple expression such as:
> A*B-C(1+3)
> 1*(3+4)
> 2*!A
In the event that wordpress feels the need to mangle my source code here is an alternative link which will maintain the formatting: http://pastebin.com/TMkT17C6
#include <assert.h> #include <stdint.h> #include <vector> #include <setjmp.h> #include <stdio.h> typedef char token_e; namespace { // return the precedence of an operator int op_prec(token_e o) { switch (o) { case ('&'): return 1; case ('+'): case ('-'): return 2; case ('*'): case ('/'): return 3; default: return 0; } } // return true if token is an operator bool is_op(token_e o) { return op_prec(o) != 0; } // return true if token is a variable identifier bool is_var(token_e o) { return (o>='A' && o<='Z') || (o>='a' && o<='z'); } // return true if token is a numeric literal bool is_num(token_e o) { return (o>='0' && o<='9'); } } // namespace {} struct parser_t { std::vector<token_e> op_; const char * s_; jmp_buf error_buf; void error() { longjmp (error_buf, 1); } parser_t(const char * stream) : op_() , s_(stream) {} char found_op () { return (is_op (*s_)) ? *(s_++) : 0; } char found_var() { return (is_var (*s_)) ? *(s_++) : 0; } char found_num() { return (is_num (*s_)) ? *(s_++) : 0; } // check for specific token in the token stream bool found(char ch) { if (*s_ == ch) { s_++; return true; } return false; } // unconditionally expect a token in the token stream void expect(char ch) { if (*(s_++) != ch) { printf ("\nexpected '%c'\n", ch); error (); } } // ---- ---- ---- ---- ---- ---- ---- ---- <expression parser> void pop_op() { assert(op_.size() > 0); token_e t = op_.back(); putchar (t); op_.pop_back(); } void push_op(uint32_t tide, token_e tok) { while (op_.size() > tide) { if (op_prec (op_.back ()) > op_prec (tok)) pop_op (); else break; } op_.push_back(tok); } void pop_all_op(uint32_t tide) { while (op_.size() > tide) pop_op(); } void parse_lhs(uint32_t tide) { // parenthesis if (found('(')) { parse_expr (); expect (')'); } // identifier else if (char var = found_var()) { // identifier name putchar (var); // index an object while (found('.')) { if (char ix = found_var()) { putchar (ix); putchar ('@'); } else { printf ("\nexpecting identifier, got '%c'\n", *s_); error (); } } // function call if (found('(')) { // uint32_t num_args = 0; // argument list if (!found(')')) { do { ++num_args; parse_expr(); } while (found(',')); expect (')'); } // call instruction printf("<%d>",-num_args); } } // numeric literal else if (char num = found_num()) { putchar (num); } // unexpected token else { printf ("\nunexpected token '%c'\n", *s_); error (); } } // parse an expression with extended control void parse_expr_ex(uint32_t tide) { bool unary_not = found ('!'); parse_lhs(tide); if (unary_not) { // logical not instruction putchar('!'); } if (char op = found_op()) { push_op (tide, op); parse_expr_ex(tide); } } // parse a full expression void parse_expr() { uint32_t tide = op_.size(); parse_expr_ex(tide); pop_all_op(tide); } // ---- ---- ---- ---- ---- ---- ---- ---- </expression parser> // parse root void parse() { if (!setjmp(error_buf)) { parse_expr (); } else { printf ("unable to parse\n"); } } }; // supported operators: // & and // + - add sub // * / mul div // ! unary not // example input: // A+B*C*(0-3) // A&!B // 2+A(B,C+1) int main() { char buffer[1024]; // little REPL loop for (;;) { printf ("expr: "); fgets(buffer, sizeof(buffer), stdin); if (buffer[0] == '\0') break; parser_t parser(buffer); printf ("post: "); parser.parse(); printf ("\n\n"); } return 0; }
Pingback: A simpler tinier expression parser | The 8Bit Pimps Pixel Mine