20040711 [CMcC]: here's a little [HTML]/[XML]/[SGML] attribute parser. It's iterative, but it uses regexps extensively.
array set match {
quote {^([a-zA-Z0-9_-]+)[ \t]*=[ \t]*["]([^"]+)["][ \t]*(.*)$}
squote {^([a-zA-Z0-9_-]+)[ \t]*=[ \t]*[']([^']+)['][ \t]*(.*)$}
uquote {^([a-zA-Z0-9_-]+)[ \t]*=[ \t]*([^ \t'"]+)[ \t]*(.*)$}
}
proc parseAttr {astring} {
global match
array set attr {}
set astring [string trim $astring]
if {$astring eq ""} {
return {}
}
while {$astring != ""} {
foreach m {quote squote uquote} {
set org $astring
if {[regexp $match($m) $astring all var val suffix]} {
set attr($var) $val
set astring [string trimleft $suffix]
}
}
if {$astring == $org} {
error "parseAttr: can't parse $astring - not a properly formed attribute string"
}
}
return [array get attr]
}
----
Since you are considering the dark side of markup parsing, you might also enjoy [XML Shallow Parsing with Regular Expressions]
----
[[
[Category HTML] |
[Category XML]
]]