vcard

Simple parser for vCard, a file format standard for electronic business cards [L1 ].

package provide vCard 2.1

#   CR                = <ASCII CR, carriage return>  ;(15, 13.)
#   LF                = <ASCII LF, linefeed>         ;(12, 10.)
#   CRLF        = CR LF
#   SPACE        = <ASCII SP, space>            ;(40, 32.)
#   HTAB        = <ASCII HT, horizontal-tab>   ;(11, 9.)

# All literal property names are valid as upper, lower, or mixed case.

# ws                = 1*(SPACE | HTAB) ; "whitespace," one or more spaces or tabs
# wsls                = 1*(SPACE | HTAB | CRLF) ; whitespace with line separators
# word                = <any printable 7bit us-ascii except []=:., >
# groups        = groups "." word | word

# these may be "folded":
# value                = 7bit | quoted-printable | base64
# 7bit                = <7bit us-ascii printable chars, excluding CR LF>
# 8bit                = <MIME RFC 1521 8-bit text>
# quoted-printable = <MIME RFC 1521 quoted-printable text>
# base64        = <MIME RFC 1521 base64 text> ; the end of the text is marked with two CRLF
#                 sequences this results in one blank line before the start of the next property

# these may be "folded":
# name                = "LOGO" | "PHOTO" | "LABEL" | "FN" | "TITLE" | "SOUND" | "VERSION" | "TEL"
#                 | "EMAIL" | "TZ" | "GEO" | "NOTE" | "URL" | "BDAY" | "ROLE" | "REV" | "UID"
#                 | "KEY" | "MAILER" | "X-" word

# knowntype        = "DOM" | "INTL" | "POSTAL" | "PARCEL" | "HOME" | "WORK" | "PREF" | "VOICE" | "FAX"
#                 | "MSG" | "CELL" | "PAGER" | "BBS" | "MODEM" | "CAR" | "ISDN" | "VIDEO" | "AOL"
#                 | "APPLELINK" | "ATTMAIL" | "CIS" | "EWORLD" | "INTERNET" | "IBMMAIL" | "MCIMAIL"
#                 | "POWERSHARE" | "PRODIGY" | "TLX" | "X400" | "GIF" | "CGM" | "WMF" | "BMP" | "MET"
#                 | "PMB" | "DIB" | "PICT" | "TIFF" | "PDF" | "PS" | "JPEG" | "QTIME" | "MPEG"
#                 | "MPEG2" | "AVI" | "WAVE" | "AIFF" | "PCM" | "X509" | "PGP"

# ptypeval        = knowntype | "X-" word
# pvalueval        = "INLINE" | "URL" | "CONTENT-ID" | "CID" | "X-" word
# pencodingval         = "7BIT" | "8BIT" | "QUOTED-PRINTABLE" | "BASE64" | "X-" word
# charsetval        = <a character set string as defined in Section 7.1 of RFC 1521>
# langval        = <a language string as defined in RFC 1766>

# param                = "TYPE" [ws] "=" [ws] ptypeval | "VALUE" [ws] "=" [ws] pvalueval
#                 | "ENCODING" [ws] "=" [ws] pencodingval | "CHARSET" [ws] "=" [ws] charsetval
#                 | "LANGUAGE" [ws] "=" [ws] langval | "X-" word [ws] "=" [ws] word | knowntype

# paramlist        = paramlist [ws] ";" [ws] param | param

# params        = ";" [ws] paramlist

# nonsemi        = <any non-control ASCII except ";">

# strnosemi        = *(*nonsemi ("\;" | "\" CRLF)) *nonsemi ; To include a semicolon in this string,
#                 it must be escaped with a "\" character.

# addressparts        = 0*6(strnosemi ";") strnosemi        ; PO Box, Extended Addr, Street, Locality, Region, Postal Code, Country Name

# orgparts        = *(strnosemi ";") strnosemi        ; First is Organization Name, remainder are Organization Units.

# nameparts        = 0*4(strnosemi ";") strnosemi        ; Family, Given, Middle, Prefix, Suffix.

#; these may be "folded"
# item                = [groups "."] name [params] ":" value CRLF | [groups "."] "ADR" [params] ":" addressparts CRLF
#                 | [groups "."] "ORG" [params] ":" orgparts CRLF | [groups "."] "N" [params] ":" nameparts CRLF
#                 | [groups "."] "AGENT" [params] ":" vcard CRLF

# items                = items *CRLF item | item

# vcard                = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"

# vcard_file        = [wsls] vcard [wsls]

namespace eval vCard {

    variable encodings {BASE64 QUOTED-PRINTABLE 8BIT}
    variable params {ENCODING CHARSET LANGUAGE VALUE TYPE}
    variable values {INLINE URL CONTENT-ID}
    variable properties {FN}

    # FN - Formatted Name - specifies the formatted name string for vCard object.
    # N - Name - This property specifies a structured representation of the name of the person,
    #   place or thing - consists of the components of the name specified as positional fields
    #   separated by the Field Delimiter character (ASCII decimal 59). The property value is a
    #   concatenation of the Family Name (first field), Given Name (second field), Additional
    #   Names (third field), Name Prefix (fourth field), and Name Suffix (fifth field) strings.
    # PHOTO - Photograph - This property specifies an image or photograph of an individual
    variable ptype {GIF CGM WMF BMP MET PMB DIB PICT TIFF PS PDF JPEG MPEG MPEG2 AVI QTIME}
    # BDAY - Birthdate - date of birth of the individual associated with the vCard. The value
    #   for this property is a calendar date in a complete representation consistent with ISO 8601.
    # ADR - Delivery Address - components that are based on the X.500 Post Office Box attribute,
    #   the X.520 Street Address geographical attribute, the X.520 Locality Name geographical
    #   attribute, the X.520 State or Province Name geographical attribute, the X.520 Postal Code
    #   attribute, and the X.520 Country Name geographical attribute.
    variable adrParam {TYPE {DOM INTL POSTAL PARCEL HOME WORK}}

    # LABEL - Delivery Label - This property is based on the semantics of the X.520 Postal Address
    #   attribute. This specification has added semantics to those defined by the X.500 Series
    #   standard for differentiating Home, Work, Parcel, Postal, Domestic, and International
    #   delivery label types. OPT

    # TEL - Telephone Number -
    variable teltype {PREF WORK HOME VOICE FAX MSG CELL PAGER BBS MODEM CAR ISDN VIDEO}
    # EMAIL -
    variable emailType {INTERNET}
    # - Time Zone - ISO 8601
    # AGENT - Agent - This property is equivalent to nesting another vCard with the specified vCard.
    # ORG - Organization Name and Organizational Unit - a concatenation of the Organization Name
    #   (first field), Organizational Unit (second field) strings. Additional positional fields,
    #   if specified, contain additional Organizational Units.
    # NOTE - Comment -
    # REV - Last Revision - calendar date and time of day of the last update to the vCard object ISO 8601
    # SOUND - sound -
    variable soundType {WAVE PCM AIFF}

    # URL - -
    # UID - Unique Idxentifier - persistent, globally unique identifier associated with the object
    # VERSION - - vcard spec supported 2.1
    # KEY - Public Key -
    variable keyType {X509 PGP}

    proc parse {text} {
            set state none
            regsub -all {\n[ \t]+} $text { } text
            set result {}
            foreach line [split $text \n] {
                set line [string trim $line]
                if {$line eq ""} continue

                set value [join [lassign [split $line :] begin] :]
                set groups [lassign [split $begin .] name]
                set params [lassign [split $name {;}] name]
                #puts stderr "parse: $name - $value"

                switch -glob -- [string toupper $name] {
                    BEGIN {
                        set entry [dict create]
                    }
                    END {
                        lappend result $entry
                    }

                    ADR {
                        lassign [split $value {;}] box ext street locality region postcode country
                        foreach v {box ext street locality region postcode country} {
                            dict set entry address $v [set $v]
                        }
                    }

                    ORG {
                        set units [lassign [split $value {;}] orgname]
                        dict set entry org name $orgname
                        dict set entry org units $units
                    }

                    N {
                        lassign [split $value {;}] family given middle prefix suffix
                        foreach v {family given middle prefix suffix} {
                            dict set entry name $v [set $v]
                        }
                    }

                    AGENT {
                        error "Can't handle agents"
                    }

                    LOGO - PHOTO - LABEL - FN - TITLE - SOUND -
                    VERSION - TEL - EMAIL - TZ - GEO - NOTE -
                    URL - BDAY - ROLE - REV - UID - KEY -
                    MAILER - X-* {
                        dict set entry $name $value
                    }

                    default {
                    }
                }
            }
            return $result
    }

    namespace export -clear *
    namespace ensemble create -subcommands {}
}

UKo 2009-11-15: small change to parse collections of vCards, too. The parse subcommand now returns a list of entries even if there is only one entry.