Skip to content

Commit 8f23f91

Browse files
benburwellemersion
authored andcommitted
Handle the invalid "utf8" encoding
While this encoding name is non-standard, it does appear in the wild. As its meaning is unambiguous, we should recognize and handle it (but not emit it ourselves).
1 parent 5c83a65 commit 8f23f91

File tree

2 files changed

+7
-2
lines changed

2 files changed

+7
-2
lines changed

charset/charset.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ func init() {
4949
// Reader returns an io.Reader that converts the provided charset to UTF-8.
5050
func Reader(charset string, input io.Reader) (io.Reader, error) {
5151
charset = strings.ToLower(charset)
52-
// "ascii" is not in the spec but is common
53-
if charset == "utf-8" || charset == "us-ascii" || charset == "ascii" {
52+
// "ascii" and "utf8" are not in the spec but are common
53+
if charset == "utf-8" || charset == "utf8" || charset == "us-ascii" || charset == "ascii" {
5454
return input, nil
5555
}
5656
if enc, ok := charsets[charset]; ok {

charset/charset_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ var testCharsets = []struct {
2121
encoded: []byte("café"),
2222
decoded: "café",
2323
},
24+
{
25+
charset: "utf8",
26+
encoded: []byte("café"),
27+
decoded: "café",
28+
},
2429
{
2530
charset: "windows-1250",
2631
encoded: []byte{0x8c, 0x8d, 0x8f, 0x9c, 0x9d, 0x9f, 0xbc, 0xbe},

0 commit comments

Comments
 (0)