Updated 2012-12-12 01:36:36 by AMG

milarepa: I just wanted to have some fun creating a program which will produce words from a text file.

Here is how it works:

Basically I will pick a wordlist in any language. So first shuffle the list. Pick the first word. Choose the "link length", let said 3. So I get the first three letters. Then find those three letters in the following words. When found, get the next letter, so now the word created have four letters. Now take the last three letters and try to find another match. The program will continue until it find a carriage return. The program have the option to choose different link length, probably most useful is 2, 3 and 4. I put a debug option to tell how the word was created. The sort option is only for debugging, so to be useful should it be random. The print option is the quantity of words to create. Startline is another option for debugging. Some options have default values.

I use tepam and Steve Cohen's shuffle number 4.

Anyone with other ideas of how to create words?

Steve Cohen's shuffle4
proc shuffle4 { list } {
      set n [llength $list]
      while {$n>0} {
          set j [expr {int(rand()*$n)}]
          lappend slist [lindex $list $j]
          incr n -1
          set temp [lindex $list $n]
          set list [lreplace [K $list [set list {}]] $j $j $temp]
      return $slist

package require tepam
namespace import -force tepam::*
set tepam::named_arguments_first 0
source shuffle4.tcl
procedure {createWord} {
-short_description "Open file and create a new word"
    -args {
        {-from -type file -description "open file to make random words"}
        {-linklength -type integer -description "length of link"}
        {-startline -type integer -default 1 -optional -description "start at line number"}
        {-debug -type none -optional -description "how the word was built"}
        {-print -type integer -default 1 -optional -description "quantity of words to print"}
        {-sort -choices {az za r 0} -optional -default 0 -description "sort list az, za or random"}
} {
    # if debugging on tkcon put global vars to be able to access them 
    global new line
    set file [open $from r]
    set line [split [read -nonewline $file] \n]
    # loop: words quantity
    for {set j 1} {$j<=$print} {incr j} {
        if {$sort eq "r"} {set line [shuffle $line]}
        if {$sort eq "az"} {set line [lsort -dictionary $line]}
        if {$sort eq "za"} {set line [lsort -dictionary -decreasing $line]}
        # pick the first word on the textfile
        set pick [lindex $line $startline+2]; set idx 0
        if {$debug} {puts " pick: $pick"}
        set pos -1
        # loop: word length
        for {set i 1} {$i<=100} {incr i} {
            # take the first [linklength] letters of the word picked
            set chain [string range $pick $pos+1 $pos+$linklength]
            if {$debug} {puts "chain: $chain"}
            # idx is the variable that contains the line of the current word
            set idx [lsearch $line $pick]
            # find another word that matches the first [linklength] letters of the word
            set pick [lsearch -start $idx+1 -inline $line *$chain*]
            if {$debug} {puts " pick: $pick"}
            # save the position of the matched letters
            set pos [string first $chain $pick]
            # get the letters of the current word
            set add [string index $pick $pos+$linklength]
            if {$i==1} {set new $chain}
            set new $new$add
            if {$debug} {puts "  new: $new"}
            if {$add eq ""} {break}
         # if the created word is in the textfile don't print and make another one
         if {[lsearch $line $new]!=-1} {incr j -1; incr startline}
         if {$debug} {puts $new}
    close $file 
createWord -from englishWords.txt -linklength 3 -print 15 -sort r