Skip to content

Commit 3fba858

Browse files
committed
Fix handling of single-character FS
Per POSIX spec (and the various implementations), a 1-char FS should be handled specially as a straight string split and not a regex. However, I had overlooked this, and was only doing this in the case of "\\". This fixes things like FS="|" as reported by @shah in #29 (thanks!). Bumped up minor version, seems like a fairly significant change rather than just a patch fix. Fixes #29.
1 parent 7d9c00c commit 3fba858

3 files changed

Lines changed: 22 additions & 21 deletions

File tree

goawk.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ import (
4343
)
4444

4545
const (
46-
version = "v1.5.1"
46+
version = "v1.6.0"
4747
copyright = "GoAWK " + version + " - Copyright (c) 2019 Ben Hoyt"
4848
shortUsage = "usage: goawk [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]"
4949
longUsage = `Standard AWK arguments:

interp/interp.go

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"runtime"
2222
"strconv"
2323
"strings"
24+
"unicode/utf8"
2425

2526
. "github.com/benhoyt/goawk/internal/ast"
2627
. "github.com/benhoyt/goawk/lexer"
@@ -1015,15 +1016,10 @@ func (p *interp) setVar(scope VarScope, index int, v value) error {
10151016
p.filename = p.toString(v)
10161017
case V_FS:
10171018
p.fieldSep = p.toString(v)
1018-
if p.fieldSep != " " {
1019-
fieldSep := p.fieldSep
1020-
if fieldSep == `\` {
1021-
// Other AWKs treat just `\` as regex `\\`
1022-
fieldSep = `\\`
1023-
}
1024-
re, err := regexp.Compile(fieldSep)
1019+
if utf8.RuneCountInString(p.fieldSep) > 1 {
1020+
re, err := regexp.Compile(p.fieldSep)
10251021
if err != nil {
1026-
return newError("invalid regex %q: %s", fieldSep, err)
1022+
return newError("invalid regex %q: %s", p.fieldSep, err)
10271023
}
10281024
p.fieldSepRegex = re
10291025
}

interp/io.go

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"os/exec"
1313
"strconv"
1414
"strings"
15+
"unicode/utf8"
1516

1617
. "github.com/benhoyt/goawk/internal/ast"
1718
. "github.com/benhoyt/goawk/lexer"
@@ -290,27 +291,31 @@ func (p *interp) ensureFields() {
290291
if p.fieldSep == " " {
291292
// FS space (default) means split fields on any whitespace
292293
p.fields = strings.Fields(p.line)
294+
} else if utf8.RuneCountInString(p.fieldSep) <= 1 {
295+
// 1-char FS is handled as plain split (not regex)
296+
p.fields = strings.Split(p.line, p.fieldSep)
293297
} else if p.line == "" {
294298
p.fields = nil
295299
} else {
296300
// Split on FS as a regex
297301
p.fields = p.fieldSepRegex.Split(p.line, -1)
302+
}
298303

299-
// Special case for when RS=="" and FS is single character,
300-
// split on newline in addition to FS. See more here:
301-
// https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html
302-
if p.recordSep == "" && len(p.fieldSep) == 1 {
303-
fields := make([]string, 0, len(p.fields))
304-
for _, field := range p.fields {
305-
lines := strings.Split(field, "\n")
306-
for _, line := range lines {
307-
trimmed := strings.TrimSuffix(line, "\r")
308-
fields = append(fields, trimmed)
309-
}
304+
// Special case for when RS=="" and FS is single character,
305+
// split on newline in addition to FS. See more here:
306+
// https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html
307+
if p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 {
308+
fields := make([]string, 0, len(p.fields))
309+
for _, field := range p.fields {
310+
lines := strings.Split(field, "\n")
311+
for _, line := range lines {
312+
trimmed := strings.TrimSuffix(line, "\r")
313+
fields = append(fields, trimmed)
310314
}
311-
p.fields = fields
312315
}
316+
p.fields = fields
313317
}
318+
314319
p.numFields = len(p.fields)
315320
}
316321

0 commit comments

Comments
 (0)