xk2600 I swear I had seen this documented somewhere on the wiki, however I can't find the reference, so I added this. Feel free to update add additional options, etc..
The problem: Have a string that needs to be split on a delimiter.. what is the fastest way to accomplish this, especially when the string is exceptionally long or variable in length?
# '''USAGE:''' '''testStringSplittingOptions''' ''delims tokens iterations ?detail?'' # delims - list of delimiters to test with # tokens - quantity of random tokens to split with delim # iterations - iterations to go through when running the time commands # within this proc # detail - provide detailed reporting instead of tabular. # proc testStringSplittingOptions {tokens iterations} { proc baseline {var1 var2} { return } set iMax $iterations set iterations [list] for {set i 1} {$i <= $iMax} {set i [expr {$i * 10}]} { lappend iterations $i } # prep string set testlist [list var0=0] for {set i 0} {$i < $tokens} {incr i} { lappend testlist [list var[expr {int(rand()*1000)}]=[expr {int(rand()*100000)}]] } foreach iteration $iterations { foreach separator [list & { } \n] { puts -nonewline [format {iteration: %6d } $iteration] puts -nonewline [format {separator: %2s } [string map {\n LF} $separator]] # join teststring with separator set teststring [join $testlist $separator] # store string length for putput set strlen [string length $teststring] puts -nonewline [format {strlen: %6d } $strlen] set proctime [ time { baseline test test2 } $iteration] puts -nonewline [format {proc: %8f } [scan $proctime %f]] #puts \ \ \ \ $proctime flush stdout # split #################################################### set splitTime [time { split $teststring $separator } $iteration] puts -nonewline [format {split: %8f } [scan $splitTime %d]] #puts \ \ \ \ $splitTime flush stdout # inline-re ################################################ set regexpTime [time { regexp -inline -all -- "(\[^$separator\]*)$separator" $teststring } $iteration] puts -nonewline [format {inline-re: %8f } [scan $regexpTime %d]] #puts \ \ \ \ $regexpTime flush stdout # scan ##################################################### set scanTime [time { set end [string length $teststring] set token {} set cursor 0 set c 0 while {$cursor < $end} { lassign [scan [string range $teststring $cursor $end] "%\[^$separator\]%n$separator"] token c incr cursor [expr {$c + 1}] } } $iteration] puts [format {inline-re: %8f } [scan $scanTime %d]] #puts \ \ \ \ $scanTime flush stdout puts {#################################################################} puts "" } } } % testStringSplittingOptions [list & { } \n] 1000 1000 iteration: 1 separator: & strlen: 12802 proc: 36.000000 split: 172.000000 inline-re: 3143.000000 inline-re: 38266.000000 iteration: 1 separator: strlen: 12802 proc: 2.000000 split: 59.000000 inline-re: 2059.000000 inline-re: 34889.000000 iteration: 1 separator: LF strlen: 12802 proc: 1.000000 split: 77.000000 inline-re: 1933.000000 inline-re: 34756.000000 iteration: 10 separator: & strlen: 12802 proc: 0.900000 split: 73.000000 inline-re: 1985.800000 inline-re: 35495.700000 iteration: 10 separator: strlen: 12802 proc: 0.800000 split: 77.800000 inline-re: 2006.200000 inline-re: 34469.900000 iteration: 10 separator: LF strlen: 12802 proc: 0.700000 split: 84.300000 inline-re: 2011.000000 inline-re: 34777.700000 iteration: 100 separator: & strlen: 12802 proc: 0.570000 split: 67.090000 inline-re: 1980.850000 inline-re: 34749.980000 iteration: 100 separator: strlen: 12802 proc: 0.790000 split: 66.790000 inline-re: 1970.210000 inline-re: 34732.560000 iteration: 100 separator: LF strlen: 12802 proc: 0.720000 split: 66.380000 inline-re: 1981.890000 inline-re: 35311.660000 iteration: 1000 separator: & strlen: 12802 proc: 0.381000 split: 66.803000 inline-re: 2005.331000 inline-re: 34960.505000 iteration: 1000 separator: strlen: 12802 proc: 0.446000 split: 68.072000 inline-re: 1998.458000 inline-re: 34863.826000 iteration: 1000 separator: LF strlen: 12802 proc: 0.453000 split: 66.062000 inline-re: 2004.961000 inline-re: 34810.200000