Updated 2014-07-27 22:01:11 by samoc

samoc: How to do binary-safe "exec":

This is a work-around for exec being broken for binary input/output. -- See http://tip.tcl.tk/259.html
proc bexec {command input} {
    # Execute shell "command", send "input" to stdin, return stdout.
    # Ignores stderr (but "2>@1" can be part of "command").
    # Supports binary input and output. e.g.:
    #     set flac_data [bexec {flac -} $wav_data]

    # Run "command" in background...
    set f [open |$command {RDWR BINARY}]
    fconfigure $f -blocking 0

    # Connect read function to collect "command" output...
    set ::bexec_done.$f 0
    set ::bexec_output.$f {}
    fileevent $f readable [list bexec_read $f]

    # Send "input" to command...
    puts -nonewline $f $input
    unset input
    close $f write

    # Wait for read function to signal "done"...
    vwait ::bexec_done.$f

    # Retrieve output...
    set result [set ::bexec_output.$f]
    unset ::bexec_output.$f
    unset ::bexec_done.$f

    fconfigure $f -blocking 1
    close $f

    return $result
}


proc bexec_read {f} {
    # Accumulate output in ::bexec_output.$f.

    append ::bexec_output.$f [read $f]
    if {[eof $f]} {
        fileevent $f readable {}
        set ::bexec_done.$f 1
    }
}

The following fails with "channel "rc0" does not support OS handles" (AMG mentions this on the exec page).
set in [tcl::chan::variable input]
chan configure $in -translation binary -blocking 0
exec $command <@$in

The patch below enables:
encoding system iso8859-1
set flac [exec -binarystdout flac --totally-silent - << $wav]

Setting the system encoding to iso8859-1 (AKA "binary") causes the $wav string to be treated as binary (encoding system says "The system encoding is used whenever Tcl passes strings to system calls")

The -binarystdout option asks exec to configure its internal output channel as "-translation binary".

A downside here is that << causes exec to call mkstemps() and write the $wav string into a temporary file. exec really should be fixed so that a large string in RAM can be sent to the stdin pipe without intermediate buffering or temporary files. (Note I think the puts call in proc bexec does something like: copy $input to a buffer; set up a "writeable" event handler; write from buffer to pipe inside vwait.)
diff -u -r tcl8.6.1/generic/tclIOCmd.c tcl8.6.1.patched/generic/tclIOCmd.c
--- tcl8.6.1/generic/tclIOCmd.c        2013-09-20 05:04:14.000000000 +1000
+++ tcl8.6.1.patched/generic/tclIOCmd.c        2014-05-21 00:00:32.000000000 +1000
@@ -877,12 +877,12 @@
     const char *string;
     Tcl_Channel chan;
     int argc, background, i, index, keepNewline, result, skip, length;
-    int ignoreStderr;
+    int ignoreStderr, binaryStdout;
     static const char *const options[] = {
-        "-ignorestderr", "-keepnewline", "--", NULL
+        "-ignorestderr", "-keepnewline", "-binarystdout", "--", NULL
     };
     enum options {
-        EXEC_IGNORESTDERR, EXEC_KEEPNEWLINE, EXEC_LAST
+        EXEC_IGNORESTDERR, EXEC_KEEPNEWLINE, EXEC_BINARYSTDOUT, EXEC_LAST
     };

     /*
@@ -891,6 +891,7 @@

     keepNewline = 0;
     ignoreStderr = 0;
+    binaryStdout = 0;
     for (skip = 1; skip < objc; skip++) {
         string = TclGetString(objv[skip]);
         if (string[0] != '-') {
@@ -904,6 +905,8 @@
             keepNewline = 1;
         } else if (index == EXEC_IGNORESTDERR) {
             ignoreStderr = 1;
+        } else if (index == EXEC_BINARYSTDOUT) {
+            binaryStdout = 1;
         } else {
             skip++;
             break;
@@ -955,6 +958,10 @@
         return TCL_ERROR;
     }

+    if (binaryStdout) {
+        Tcl_SetChannelOption(interp, chan, "-translation", "binary");
+    }
+
     if (background) {
         /*
          * Store the list of PIDs from the pipeline in interp's result and
@@ -1002,7 +1009,7 @@
      * newline character.
      */

-    if (keepNewline == 0) {
+    if (keepNewline == 0 && binaryStdout == 0) {
         string = TclGetStringFromObj(resultPtr, &length);
         if ((length > 0) && (string[length - 1] == '\n')) {
             Tcl_SetObjLength(resultPtr, length - 1);

Below is an alternate patch that dispenses with the -binarystdout option. Instead it looks at what the system encoding is set to. If it is set to binary then exec output is treated as binary.
diff -u -r tcl8.6.1/generic/tclIOCmd.c tcl8.6.1.patched/generic/tclIOCmd.c
--- tcl8.6.1/generic/tclIOCmd.c        2013-09-20 05:04:14.000000000 +1000
+++ tcl8.6.1.patched/generic/tclIOCmd.c        2014-05-21 00:16:29.000000000 +1000
@@ -877,7 +877,7 @@
     const char *string;
     Tcl_Channel chan;
     int argc, background, i, index, keepNewline, result, skip, length;
-    int ignoreStderr;
+    int ignoreStderr, binaryStdout;
     static const char *const options[] = {
         "-ignorestderr", "-keepnewline", "--", NULL
     };
@@ -955,6 +955,11 @@
         return TCL_ERROR;
     }

+    binaryStdout = (strcmp(Tcl_GetEncodingName(NULL), "iso8859-1") == 0);
+    if (binaryStdout) {
+        Tcl_SetChannelOption(interp, chan, "-translation", "binary");
+    }
+
     if (background) {
         /*
          * Store the list of PIDs from the pipeline in interp's result and
@@ -1002,7 +1007,7 @@
      * newline character.
      */

-    if (keepNewline == 0) {
+    if (keepNewline == 0 && binaryStdout == 0) {
         string = TclGetStringFromObj(resultPtr, &length);
         if ((length > 0) && (string[length - 1] == '\n')) {
             Tcl_SetObjLength(resultPtr, length - 1);