Move urlencode/urldecode functions to core lib

2024-11-21 21:30:09 +00:00 · 2015-08-09 16:28:47 -04:00 · 2015-08-09 16:28:47 -04:00 · 14b4ba83c3
commit 14b4ba83c3
parent 88f42b6132
2 changed files with 135 additions and 32 deletions
--- a/lib/functions.zsh
+++ b/lib/functions.zsh
@ -73,3 +73,137 @@ function env_default() {
    env | grep -q "^$1=" && return 0 
    export "$1=$2"       && return 3
 }
 # Required for $langinfo
 zmodload zsh/langinfo
 # URL-encode a string
 #
 # Encodes a string using RFC 2396 URL-encoding (%-escaped).
 # See: https://www.ietf.org/rfc/rfc2396.txt
 #
 # By default, reserved characters and unreserved "mark" characters are
 # not escaped by this function. This allows the common usage of passing
 # an entire URL in, and encoding just special characters in it, with 
 # the expectation that reserved and mark characters are used appropriately.
 # The -r and -m options turn on escaping of the reserved and mark characters,
 # respectively, which allows arbitrary strings to be fully escaped for
 # embedding inside URLs, where reserved characters might be misinterpreted.
 #
 # Prints the encoded string on stdout.
 # Returns nonzero if encoding failed.
 #
 # Usage:
 #  omz_urlencode [-r] [-m] <string>
 #  
 #    -r causes reserved characters (;/?:@&=+$,) to be escaped
 #
 #    -m causes "mark" characters (_.!~*''()-) to be escaped
 #
 #    -P causes spaces to be encoded as '%20' instead of '+'
 function omz_urlencode() {
  emulate -L zsh
  zparseopts -D -E -a opts r m P
  local in_str=$1
  local url_str=""
  local spaces_as_plus
  if [[ -z $opts[(r)-P] ]]; then spaces_as_plus=1; fi
  local str="$in_str"
  # URLs must use UTF-8 encoding; convert str to UTF-8 if required
  local encoding=$langinfo[CODESET]
  local safe_encodings
  safe_encodings=(UTF-8 utf8 US-ASCII)
  if [[ -z ${safe_encodings[(r)$encoding]} ]]; then
    str=$(echo -E "$str" | iconv -f $encoding -t UTF-8)
    if [[ $? != 0 ]]; then
      echo "Error converting string from $encoding to UTF-8" >&2
      return 1
    fi
  fi
  # Use LC_CTYPE=C to process text byte-by-byte
  local i byte ord LC_ALL=C
  export LC_ALL
  local reserved=';/?:@&=+$,'
  local mark='_.!~*''()-'
  local dont_escape="[A-Za-z0-9"
  if [[ -z $opts[(r)-r] ]]; then
    dont_escape+=$reserved
  fi
  # $mark must be last because of the "-"
  if [[ -z $opts[(r)-m] ]]; then
    dont_escape+=$mark
  fi
  dont_escape+="]"
  # Implemented to use a single printf call and avoid subshells in the loop,
  # for performance (primarily on Windows).
  local url_str=""
  for (( i = 1; i <= ${#str}; ++i )); do
    byte="$str[i]"
    if [[ "$byte" =~ "$dont_escape" ]]; then
      url_str+="$byte"
    else
      if [[ "$byte" == " " && -n $spaces_as_plus ]]; then
        url_str+="+"
      else
        ord=$(( [##16] #byte ))
        url_str+="%$ord"
      fi
    fi
  done
  echo -E "$url_str"
 }
 # URL-decode a string
 #
 # Decodes a RFC 2396 URL-encoded (%-escaped) string.
 # This decodes the '+' and '%' escapes in the input string, and leaves 
 # other characters unchanged. Does not enforce that the input is a 
 # valid URL-encoded string. This is a convenience to allow callers to
 # pass in a full URL or similar strings and decode them for human
 # presentation.
 #
 # Outputs the encoded string on stdout.
 # Returns nonzero if encoding failed.
 #
 # Usage:
 #   omz_urldecode <urlstring>  - prints decoded string followed by a newline
 function omz_urldecode {
  emulate -L zsh
  local encoded_url=$1
  echo -e input $1
  # Work bytewise, since URLs escape UTF-8 octets
  local caller_encoding=$langinfo[CODESET]
  local LC_ALL=C
  export LC_ALL
  # Change + back to ' '
  local tmp=${encoded_url:gs/+/ /}
  # Protect other escapes to pass through the printf unchanged
  tmp=${tmp:gs/\\/\\\\/}
  # Handle %-escapes by turning them into `\xXX` printf escapes
  tmp=${tmp:gs/%/\\x/}
  echo -E "before decode $tmp"
  local decoded
  eval "decoded=\$'$tmp'"
  # Now we have a UTF-8 encoded string in the variable. We need to re-encode
  # it if caller is in a non-UTF-8 locale.
  local safe_encodings
  safe_encodings=(UTF-8 utf8 US-ASCII)
  if [[ -z ${safe_encodings[(r)$caller_encoding]} ]]; then
    decoded=$(echo -E "$decoded" | iconv -f UTF-8 -t $caller_encoding)
    if [[ $? != 0 ]]; then
      echo "Error converting string from UTF-8 to $caller_encoding" >&2
      return 1
    fi
  fi
  echo -E "$decoded"
 }
--- a/lib/termsupport.zsh
+++ b/lib/termsupport.zsh
@ -59,44 +59,13 @@ preexec_functions+=(omz_termsupport_preexec)
 if [[ "$TERM_PROGRAM" == "Apple_Terminal" ]] && [[ -z "$INSIDE_EMACS" ]]; then
  # URL-encodes a string
  # Outputs the encoded string on stdout
  # Returns nonzero if encoding failed
  function _omz_urlencode() {
    local str=$1
    local url_str=""
    # URLs must use UTF-8 encoding; convert if required
    local encoding=${LC_CTYPE/*./}
    if [[ -n $encoding && $encoding != UTF-8 && $encoding != utf8 ]]; then
      str=$(echo $str | iconv -f $encoding -t UTF-8)
      if [[ $? != 0 ]]; then
        echo "Error converting string from $encoding to UTF-8" >&2
        return 1
      fi
    fi
    # Use LC_CTYPE=C to process text byte-by-byte
    local i ch hexch LC_CTYPE=C
    for ((i = 1; i <= ${#str}; ++i)); do
      ch="$str[i]"
      if [[ "$ch" =~ [/._~A-Za-z0-9-] ]]; then
        url_str+="$ch"
      else
        hexch=$(printf "%02X" "'$ch")
        url_str+="%$hexch"
      fi
    done
    echo $url_str
  }
  # Emits the control sequence to notify Terminal.app of the cwd
  function update_terminalapp_cwd() {
    # Identify the directory using a "file:" scheme URL, including
    # the host name to disambiguate local vs. remote paths.
    # Percent-encode the pathname.
-    local URL_PATH=$(_omz_urlencode $PWD)
+    local URL_PATH=$(omz_urlencode -P $PWD)
    [[ $? != 0 ]] && return 1
    local PWD_URL="file://$HOST$URL_PATH"
    # Undocumented Terminal.app-specific control sequence