Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

internal/encoding: don't interpret file as UTF8 with binary encoding #3740

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cmd/cue/cmd/script_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,18 @@ func TestScript(t *testing.T) {
ts.Check(err)
}
},
// strconv-unquote treats each argument as a go quoted string, unquotes it and prints
// them, separated by newlines, to stdout
"strconv-unquote": func(ts *testscript.TestScript, neg bool, args []string) {
if neg {
ts.Fatalf("usage: strconv-unquote args...")
}
for _, quoted := range args {
s, err := strconv.Unquote(quoted)
ts.Check(err)
fmt.Fprintln(ts.Stdout(), s)
}
},
},
Setup: func(e *testscript.Env) error {
// If a testscript loads CUE packages but forgot to set up a cue.mod,
Expand Down
38 changes: 38 additions & 0 deletions cmd/cue/cmd/testdata/script/embed.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ env CUE_EXPERIMENT=embed=0
cmp stderr out/noembed
env CUE_EXPERIMENT=

# Create test files for type=binary. txtar can only hold valid UTF8 - we need to test arbitrary bytes as input so we use quoted strings for storage
# Text file, encoded in UTF8-8 with leading BOM
strconv-unquote '"\xef\xbb\xbfHello, 世界"'
cp stdout test-utf8.binary
# A byte sequence - should be invalid UTF-8
strconv-unquote "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
cp stdout test-nonutf8.binary

exec cue eval
cmp stdout out/eval

Expand Down Expand Up @@ -39,6 +47,12 @@ f: _ @embed(file="openapi.json", type=openapi)

g: _ @embed(file="openapi.json") // test no auto mode!

h: _ @embed(file="test-utf8.binary", type=binary)

i: _ @embed(file="test-utf8.binary", type=text)

j: _ @embed(file="test-nonutf8.binary", type=binary)

special: {
// These are all valid.
underscoreFile: _ @embed(file="y/_test.json")
Expand Down Expand Up @@ -213,6 +227,18 @@ g: {
}
}
}
h: '''
\ufeffHello, 世界

'''
i: """
Hello, 世界

"""
j: '''
\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7

'''
special: {
underscoreFile: {
z: 45
Expand Down Expand Up @@ -273,6 +299,18 @@ g: {
}
}
}
h: '''
\ufeffHello, 世界

'''
i: """
Hello, 世界

"""
j: '''
\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7

'''
special: {
// These are all valid.
underscoreFile: z: 45
Expand Down
4 changes: 3 additions & 1 deletion internal/encoding/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,9 @@ func NewDecoder(ctx *cue.Context, f *build.File, cfg *Config) *Decoder {
i.err = err
i.expr = ast.NewString(string(b))
case build.Binary:
b, err := io.ReadAll(r)
// Binary files should not generally be treated as UTF-8. Don't use the
// [transform.Reader] created above but read directly from the file instead.
b, err := io.ReadAll(srcr)
i.err = err
s := literal.Bytes.WithTabIndent(1).Quote(string(b))
i.expr = ast.NewLit(token.STRING, s)
Expand Down
Loading