Updated 2008-07-18 09:20:01 by lars_h

For parsing expression grammars and related topics, see grammar::peg.

SS 23Jan2005 - In order to add the expr command to Jim (A small-footprint Tcl implementation I'm working on) I had to write a compiler able to turn mathematical expressions into bytecode for a stack-based machine. Before to write it in C, I wrote a prototype in Tcl that can be useful. AM is also working on something like this, similar in design to the Tcl's expr parser itself. This one is a bit different. It generates a stack program from the expr representation, and optionally can turn the stack program into a Tcl program (i.e. a parse tree). For Jim the last part is not useful, but I added it for completeness. This code does not check at all if the input expression is correct. It's just a prototype, I'm going to write the real version in C.

SS 24Jan2005 - new version able to handle unary operators, including unary/binary - and + usage detection.

Example output:
 Exp: 1+2*3
 Rpn: 1 2 3 * +
 Tcl: [+ 1 [* 2 3]]

 Exp: 1*2+3
 Rpn: 1 2 * 3 +
 Tcl: [+ [* 1 2] 3]

 Exp: ((1*(2+3)*4)+5)*2
 Rpn: 1 2 3 + * 4 * 5 + 2 *
 Tcl: [* [+ [* [* 1 [+ 2 3]] 4] 5] 2]

 Exp: -1+5
 Rpn: 1 unary_minus 5 +
 Tcl: [+ [unary_minus 1] 5]

 Exp: 4-+5
 Rpn: 4 5 unary_plus -
 Tcl: [- 4 [unary_plus 5]]

 Exp: 2*0-1+5
 Rpn: 2 0 * 1 - 5 +
 Tcl: [+ [- [* 2 0] 1] 5]

 Exp: 1+2*3+4*5+6
 Rpn: 1 2 3 * + 4 5 * + 6 +
 Tcl: [+ [+ [+ 1 [* 2 3]] [* 4 5]] 6]

 Exp: (1+2 || 3+4) && 10
 Rpn: 1 2 + 3 4 + || 10 &&
 Tcl: [&& [|| [+ 1 2] [+ 3 4]] 10]

 Exp: !!!3+4
 Rpn: 3 ! ! ! 4 +
 Tcl: [+ [! [! [! 3]]] 4]

Exp is the input expression, Rpn is the generated RPN program, Tcl is the RPN program translated into a Tcl program.

And that's the code:
 # Expression parser in Tcl.
 # Copyright (C) 2005 Salvatore Sanfilippo

 # This list represents the operators.
 # is composed of groups of three elements:
 # The operator name, precedente, arity.

 set ExprOperators {
     "!" 300 1
     "~" 300 1
     "unary_minus" 300 1
     "unary_plus" 300 1

     "*" 200 2
     "/" 200 2

     "-" 100 2
     "+" 100 2

     "&&" 10 2
     "||" 10 2
 }

 proc ExprOperatorPrecedence op {
     foreach {name prec arity} $::ExprOperators {
	if {$name eq $op} {return $prec}
     }
     return -1
 }

 proc ExprOperatorArity op {
     foreach {name prec arity} $::ExprOperators {
	if {$name eq $op} {return $arity}
     }
     return -1
 }

 proc ExprIsOperator op {
     expr {[ExprOperatorPrecedence $op] != -1}
 }

 proc ExprGetToken exprVar {
     upvar 1 $exprVar expression
     set expression [string trim $expression]
     if {[regexp {(^[0-9]+)(.*)} $expression -> tok exprRest]} {
	set res [list operand $tok]
	set expression $exprRest
     } elseif {[ExprIsOperator [string range $expression 0 1]]} {
	set res [list operator [string range $expression 0 1]]
	set expression [string range $expression 2 end]
     } elseif {[ExprIsOperator [string index $expression 0]]} {
	set res [list operator [string index $expression 0]]
	set expression [string range $expression 1 end]
     } elseif {[string index $expression 0] eq "("} {
	set res [list substart {}]
	set expression [string range $expression 1 end]
     } elseif {[string index $expression 0] eq ")"} {
	set res [list subend {}]
	set expression [string range $expression 1 end]
     } else {
	return -code error \
	    "default reached in ExprGetToken. String: '$expression'"
     }
     return $res
 }

 proc ExprTokenize expression {
     set tokens {}
     while {[string length [string trim $expression]]} {
	lappend tokens [ExprGetToken expression]
     }
     # Post-processing stage. Turns "-" into "unary_minus"
     # when - is used as unary minus. The same with unary +.
     for {set i 0} {$i < [llength $tokens]} {incr i} {
	if {[lindex $tokens $i 0] eq {operator} && \
	    ([lindex $tokens $i 1] eq {-} || \
	     [lindex $tokens $i 1] eq {+}) && \
	    ([lindex $tokens [expr $i-1] 0] eq {operator} || $i == 0)} \
	{
	    switch -- [lindex $tokens $i 1] {
		- {lset tokens $i 1 "unary_minus"}
		+ {lset tokens $i 1 "unary_plus"}
	    }
	}
     }
     return $tokens
 }

 proc ExprPop listVar {
     upvar 1 $listVar list
     set ele [lindex $list end]
     set list [lindex [list [lrange $list 0 end-1] [set list {}]] 0]
     return $ele
 }

 proc ExprPush {listVar element} {
     upvar 1 $listVar list
     lappend list $element
 }

 proc ExprPeek listVar {
     upvar 1 $listVar list
     lindex $list end
 }

 proc ExprTokensToRPN tokens {
     set rpn {}
     set stack {}
     foreach t $tokens {
	foreach {type token} $t {}
	if {$type eq {operand}} {
	    ExprPush rpn $token
	} elseif {$type eq {operator}} {
	    while {[llength $stack] && \
		    [ExprOperatorArity $token] != 1 &&
		    [ExprOperatorPrecedence [ExprPeek stack]] >= \
		    [ExprOperatorPrecedence $token]} \
	    {
		ExprPush rpn [ExprPop stack]
	    }
	    ExprPush stack $token
	} elseif {$type eq {substart}} {
	    ExprPush stack "("
	} elseif {$type eq {subend}} {
	    while 1 {
		set op [ExprPop stack]
		if {$op eq "("} break
		ExprPush rpn $op
	    }
	}
     }
     while {[llength $stack]} {
	ExprPush rpn [ExprPop stack]
     }
     return $rpn
 }

 proc ExprToRpn expression {
     set tokens [ExprTokenize $expression]
     ExprTokensToRPN $tokens
 }

 proc ExprRpnToTcl rpn {
     set stack {}
     foreach item $rpn {
	if {[ExprIsOperator $item]} {
	    set arity [ExprOperatorArity $item]
	    set operators [lrange $stack end-[expr {$arity-1}] end]
	    set stack [lrange $stack 0 end-$arity]
	    while {$arity} {ExprPop rpn; incr arity -1}
	    set item "$item "
	    foreach operator $operators {
		append item "$operator "
	    }
	    set item [string range $item 0 end-1]
	    ExprPush stack "\[$item\]"
	} else {
	    ExprPush stack $item
	}
     }
     return [lindex $stack 0]
 }

 proc ExprTest {} {
     set expressions {
	{1+2*3}
	{1*2+3}
	{((1*(2+3)*4)+5)*2}
	{-1+5}
	{4-+5}
	{2*0-1+5}
	{1+2*3+4*5+6}
	{(1+2 || 3+4) && 10}
	{!!!3+4}
     }
     foreach e $expressions {
	set rpn [ExprToRpn $e]
	set tcl [ExprRpnToTcl $rpn]
	puts "Exp: $e"
	puts "Rpn: $rpn"
	puts "Tcl: $tcl"
	puts {}
     }
 }

 proc ExprInteractiveTest {} {
     while 1 {
	puts -nonewline "expr> "
	flush stdout
	gets stdin e
	if {$e eq {exit}} exit
	if {[string trim $e] eq {}} continue
	set tokens [ExprTokenize $e]
	set rpn [ExprToRpn $e]
	set tcl [ExprRpnToTcl $rpn]
	puts $tokens
	puts $rpn
	puts $tcl
     }
 }

 #ExprInteractiveTest
 ExprTest

TP While starting some work on a YAUTP (yet another unfinished Tcl project), I found another expr parser written in Tcl. It's from the NSync project (not the boy band :-). The only place I've found it is at: http://www.openmash.org/lxr/source/tcl/nsync/

The files NSParser.tcl and NSLexicalAnalyzer.tcl form a LL(1) predictive parser, driven by production tables. It's not quite a full parser for Tcl expr command, but close enough to provide a good start.

A paper on NSync can be found at: http://www.usenix.org/publications/library/proceedings/tcl97/full_papers/bailey/bailey.ps or an HTML version at: http://www.usenix.org/publications/library/proceedings/tcl97/full_papers/bailey/bailey_html/TclTk97_Nsync.html

Also see Expression parsing

AM I finally fulfilled a promise I made to myself and others on this subject: Creating your own expr command.