Skip to content

Commit

Permalink
Merge pull request #14 from ffuf/unicode
Browse files Browse the repository at this point in the history
Unicode encoder and decoder
  • Loading branch information
joohoi authored Jun 26, 2020
2 parents 032a6ad + 71035dc commit 5216163
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 7 deletions.
16 changes: 9 additions & 7 deletions pkg/pencode/encoders.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ import (
)

var availableEncoders = map[string]Encoder{
"b64encode": Base64Encoder{},
"b64decode": Base64Decoder{},
"hexencode": HexEncoder{},
"hexdecode": HexDecoder{},
"urlencode": URLEncoder{},
"urldecode": URLDecoder{},
"urlencodeall": URLEncoderAll{},
"b64encode": Base64Encoder{},
"b64decode": Base64Decoder{},
"hexencode": HexEncoder{},
"hexdecode": HexDecoder{},
"unicodedecode": UnicodeDecode{},
"unicodeencodeall": UnicodeEncodeAll{},
"urlencode": URLEncoder{},
"urldecode": URLDecoder{},
"urlencodeall": URLEncoderAll{},
}

type Chain struct {
Expand Down
122 changes: 122 additions & 0 deletions pkg/pencode/unicodedecode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package pencode

import (
"unicode"
"unicode/utf16"
"unicode/utf8"
)

type UnicodeDecode struct{}

func (u UnicodeDecode) Encode(input []byte) ([]byte, error) {
return unquoteBytes(input), nil
}

func (u UnicodeDecode) HelpText() string {
return "Unicode escape string decode"
}

//This functionality is copied from encoding/json/decode.go with minor modifications
func unquoteBytes(s []byte) []byte {
b := make([]byte, len(s)+2*utf8.UTFMax)
r := 0
w := copy(b, s[0:r])
for r < len(s) {
// Out of room? Can only happen if s is full of
// malformed UTF-8 and we're replacing each
// byte with RuneError.
if w >= len(b)-2*utf8.UTFMax {
nb := make([]byte, (len(b)+utf8.UTFMax)*2)
copy(nb, b[0:w])
b = nb
}
switch c := s[r]; {
case c == '\\':
r++
if r >= len(s) {
return b[0:w]
}
switch s[r] {
default:
return b[0:w]
case '"', '\\', '/', '\'':
b[w] = s[r]
r++
w++
case 'b':
b[w] = '\b'
r++
w++
case 'f':
b[w] = '\f'
r++
w++
case 'n':
b[w] = '\n'
r++
w++
case 'r':
b[w] = '\r'
r++
w++
case 't':
b[w] = '\t'
r++
w++
case 'u':
r--
rr := getu4(s[r:])
if rr < 0 {
return b[0:w]
}
r += 6
if utf16.IsSurrogate(rr) {
rr1 := getu4(s[r:])
if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
// A valid pair; consume.
r += 6
w += utf8.EncodeRune(b[w:], dec)
break
}
// Invalid surrogate; fall back to replacement rune.
rr = unicode.ReplacementChar
}
w += utf8.EncodeRune(b[w:], rr)
}
// ASCII
case c < utf8.RuneSelf:
b[w] = c
r++
w++
// Coerce to well-formed UTF-8.
default:
rr, size := utf8.DecodeRune(s[r:])
r += size
w += utf8.EncodeRune(b[w:], rr)
}
}
return b[0:w]
}

// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
// or it returns -1.
func getu4(s []byte) rune {
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1
}
var r rune
for _, c := range s[2:6] {
switch {
case '0' <= c && c <= '9':
c = c - '0'
case 'a' <= c && c <= 'f':
c = c - 'a' + 10
case 'A' <= c && c <= 'F':
c = c - 'A' + 10
default:
return -1
}
r = r*16 + rune(c)
}
return r
}
22 changes: 22 additions & 0 deletions pkg/pencode/unicodeencodeall.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package pencode

import (
"bytes"
"fmt"
)

type UnicodeEncodeAll struct{}

func (u UnicodeEncodeAll) Encode(input []byte) ([]byte, error) {
var b bytes.Buffer
runes := []rune(string(input))
for _, r := range runes {
b.WriteString("\\u")
b.WriteString(fmt.Sprintf("%04x", int64(r)))
}
return b.Bytes(), nil
}

func (u UnicodeEncodeAll) HelpText() string {
return "Unicode escape string encode (all characters)"
}

0 comments on commit 5216163

Please sign in to comment.