diff --git a/interp/functions.go b/interp/functions.go index 8752179a..09e26240 100644 --- a/interp/functions.go +++ b/interp/functions.go @@ -53,13 +53,13 @@ func (p *interp) callNative(index int, args []value) (value, error) { return null(), nil case 1: // Single return value - return fromNative(outs[0]), nil + return fromNative(outs[0], p.chars), nil case 2: // Two-valued return of (scalar, error) if !outs[1].IsNil() { return null(), outs[1].Interface().(error) } - return fromNative(outs[0]), nil + return fromNative(outs[0], p.chars), nil default: // Should never happen (checked at parse time) panic(fmt.Sprintf("unexpected number of return values: %d", len(outs))) @@ -110,7 +110,7 @@ func (p *interp) toNative(v value, typ reflect.Type) reflect.Value { } // Convert from a native Go value to an AWK value -func fromNative(v reflect.Value) value { +func fromNative(v reflect.Value, chars bool) value { switch v.Kind() { case reflect.Bool: return boolean(v.Bool()) @@ -121,10 +121,10 @@ func fromNative(v reflect.Value) value { case reflect.Float32, reflect.Float64: return num(v.Float()) case reflect.String: - return str(v.String()) + return str(v.String(), chars) case reflect.Slice: if b, ok := v.Interface().([]byte); ok { - return str(string(b)) + return str(string(b), chars) } // Shouldn't happen: prevented by checkNativeFunc panic(fmt.Sprintf("unexpected return slice: %s", v.Type().Elem().Kind())) @@ -279,7 +279,7 @@ func (p *interp) split(s string, scope resolver.Scope, index int, fs string, mod } array := make(map[string]value, len(parts)) for i, part := range parts { - array[strconv.Itoa(i+1)] = numStr(part) + array[strconv.Itoa(i+1)] = numStr(part, p.chars) } p.arrays[p.arrayIndex(scope, index)] = array return len(array), nil @@ -449,51 +449,3 @@ func (p *interp) sprintf(format string, args []value) (string, error) { } return fmt.Sprintf(format, converted...), nil } - -func substrChars(s string, pos int) string { - // Count characters till we get to pos. - chars := 1 - start := 0 - for start = range s { - chars++ - if chars > pos { - break - } - } - if pos >= chars { - start = len(s) - } - return s[start:] -} - -func substrLengthChars(s string, pos, length int) string { - // Count characters till we get to pos. - chars := 1 - start := 0 - for start = range s { - chars++ - if chars > pos { - break - } - } - if pos >= chars { - start = len(s) - } - - // Count characters from start till we reach length. - chars = 0 - end := 0 - for end = range s[start:] { - chars++ - if chars > length { - break - } - } - if length >= chars { - end = len(s) - } else { - end += start - } - - return s[start:end] -} diff --git a/interp/interp.go b/interp/interp.go index 5524eb0f..ce28026c 100644 --- a/interp/interp.go +++ b/interp/interp.go @@ -475,10 +475,10 @@ func (p *interp) setExecuteConfig(config *Config) error { // Set up ARGV and other variables from config argvIndex := p.arrayIndexes["ARGV"] - p.setArrayValue(resolver.Global, argvIndex, "0", str(config.Argv0)) + p.setArrayValue(resolver.Global, argvIndex, "0", str(config.Argv0, p.chars)) p.argc = len(config.Args) + 1 for i, arg := range config.Args { - p.setArrayValue(resolver.Global, argvIndex, strconv.Itoa(i+1), numStr(arg)) + p.setArrayValue(resolver.Global, argvIndex, strconv.Itoa(i+1), numStr(arg, p.chars)) } p.noArgVars = config.NoArgVars p.filenameIndex = 1 @@ -505,13 +505,13 @@ func (p *interp) setExecuteConfig(config *Config) error { environIndex := p.arrayIndexes["ENVIRON"] if config.Environ != nil { for i := 0; i < len(config.Environ); i += 2 { - p.setArrayValue(resolver.Global, environIndex, config.Environ[i], numStr(config.Environ[i+1])) + p.setArrayValue(resolver.Global, environIndex, config.Environ[i], numStr(config.Environ[i+1], p.chars)) } } else { for _, kv := range os.Environ() { key, val, ok := strings.Cut(kv, "=") if ok { - p.setArrayValue(resolver.Global, environIndex, key, numStr(val)) + p.setArrayValue(resolver.Global, environIndex, key, numStr(val, p.chars)) } } } @@ -748,27 +748,27 @@ func (p *interp) getSpecial(index int) value { case ast.V_ARGC: return num(float64(p.argc)) case ast.V_CONVFMT: - return str(p.convertFormat) + return str(p.convertFormat, p.chars) case ast.V_FILENAME: return p.filename case ast.V_FS: - return str(p.fieldSep) + return str(p.fieldSep, p.chars) case ast.V_OFMT: - return str(p.outputFormat) + return str(p.outputFormat, p.chars) case ast.V_OFS: - return str(p.outputFieldSep) + return str(p.outputFieldSep, p.chars) case ast.V_ORS: - return str(p.outputRecordSep) + return str(p.outputRecordSep, p.chars) case ast.V_RS: - return str(p.recordSep) + return str(p.recordSep, p.chars) case ast.V_RT: - return str(p.recordTerminator) + return str(p.recordTerminator, p.chars) case ast.V_SUBSEP: - return str(p.subscriptSep) + return str(p.subscriptSep, p.chars) case ast.V_INPUTMODE: - return str(inputModeString(p.inputMode, p.csvInputConfig)) + return str(inputModeString(p.inputMode, p.csvInputConfig), p.chars) case ast.V_OUTPUTMODE: - return str(outputModeString(p.outputMode, p.csvOutputConfig)) + return str(outputModeString(p.outputMode, p.csvOutputConfig), p.chars) default: panic(fmt.Sprintf("unexpected special variable index: %d", index)) } @@ -778,11 +778,11 @@ func (p *interp) getSpecial(index int) value { func (p *interp) setVarByName(name, value string) error { index := ast.SpecialVarIndex(name) if index > 0 { - return p.setSpecial(index, numStr(value)) + return p.setSpecial(index, numStr(value, p.chars)) } index, ok := p.scalarIndexes[name] if ok { - p.globals[index] = numStr(value) + p.globals[index] = numStr(value, p.chars) return nil } // Ignore variables that aren't defined in program @@ -925,25 +925,25 @@ func (p *interp) setArrayValue(scope resolver.Scope, arrayIndex int, index strin func (p *interp) getField(index int) value { if index == 0 { if p.lineIsTrueStr { - return str(p.line) + return str(p.line, p.chars) } else { - return numStr(p.line) + return numStr(p.line, p.chars) } } p.ensureFields() if index < 1 { index = len(p.fields) + 1 + index if index < 1 { - return str("") + return str("", p.chars) } } if index > len(p.fields) { - return str("") + return str("", p.chars) } if p.fieldsIsTrueStr[index-1] { - return str(p.fields[index-1]) + return str(p.fields[index-1], p.chars) } else { - return numStr(p.fields[index-1]) + return numStr(p.fields[index-1], p.chars) } } @@ -961,7 +961,7 @@ func (p *interp) getFieldByName(name string) (value, error) { } index := p.fieldIndexes[name] if index == 0 { - return str(""), nil + return str("", p.chars), nil } return p.getField(index), nil } diff --git a/interp/io.go b/interp/io.go index a48d4f35..89441295 100644 --- a/interp/io.go +++ b/interp/io.go @@ -285,7 +285,7 @@ func (p *interp) setFieldNames(names []string) { delete(fieldsArray, k) } for i, name := range names { - fieldsArray[strconv.Itoa(i+1)] = str(name) + fieldsArray[strconv.Itoa(i+1)] = str(name, p.chars) } } @@ -620,7 +620,7 @@ func nextRune(b []byte) rune { // Setup for a new input file with given name (empty string if stdin) func (p *interp) setFile(filename string) { - p.filename = numStr(filename) + p.filename = numStr(filename, p.chars) p.fileLineNum = 0 p.hadFiles = true } diff --git a/interp/value.go b/interp/value.go index 30795686..78f072e4 100644 --- a/interp/value.go +++ b/interp/value.go @@ -22,7 +22,8 @@ const ( type value struct { typ valueType // Type of value s string // String value (for typeStr and typeNumStr) - n float64 // Numeric value (for typeNum) + r *[]rune + n float64 // Numeric value (for typeNum) } // Create a new null value @@ -36,13 +37,25 @@ func num(n float64) value { } // Create a new string value -func str(s string) value { - return value{typ: typeStr, s: s} +func str(s string, chars bool) value { + var runes *[]rune + if chars { + runes = new([]rune) + } + return value{typ: typeStr, s: s, r: runes} +} + +func strFromRunes(runes []rune) value { + return value{typ: typeStr, s: string(runes), r: &runes} } // Create a new value to represent a "numeric string" from an input field -func numStr(s string) value { - return value{typ: typeNumStr, s: s} +func numStr(s string, chars bool) value { + var runes *[]rune + if chars { + runes = new([]rune) + } + return value{typ: typeNumStr, s: s, r: runes} } // Create a numeric value from a Go bool @@ -53,6 +66,18 @@ func boolean(b bool) value { return num(0) } +func (v value) runes(floatFormat string) []rune { + switch v.typ { + case typeStr, typeNumStr: + if *v.r == nil { + *v.r = []rune(v.s) + } + return *v.r + default: // typeNum, typeNull + return []rune(v.str(floatFormat)) + } +} + // String returns a string representation of v for debugging. func (v value) String() string { switch v.typ { diff --git a/interp/vm.go b/interp/vm.go index ffaba655..1c8218ad 100644 --- a/interp/vm.go +++ b/interp/vm.go @@ -48,7 +48,7 @@ func (p *interp) execute(code []compiler.Opcode) error { case compiler.Str: index := code[ip] ip++ - p.push(str(p.strs[index])) + p.push(str(p.strs[index], p.chars)) case compiler.Dupe: v := p.peekTop() @@ -334,7 +334,7 @@ func (p *interp) execute(code []compiler.Opcode) error { for _, v := range values { indices = append(indices, p.toString(v)) } - p.push(str(strings.Join(indices, p.subscriptSep))) + p.push(str(strings.Join(indices, p.subscriptSep), p.chars)) case compiler.Add: l, r := p.peekPop() @@ -430,7 +430,7 @@ func (p *interp) execute(code []compiler.Opcode) error { case compiler.Concat: l, r := p.peekPop() - p.replaceTop(str(p.toString(l) + p.toString(r))) + p.replaceTop(str(p.toString(l)+p.toString(r), p.chars)) case compiler.ConcatMulti: numValues := int(code[ip]) @@ -441,7 +441,7 @@ func (p *interp) execute(code []compiler.Opcode) error { for _, v := range values { sb.WriteString(p.toString(v)) } - p.push(str(sb.String())) + p.push(str(sb.String(), p.chars)) case compiler.Match: l, r := p.peekPop() @@ -615,11 +615,11 @@ func (p *interp) execute(code []compiler.Opcode) error { for index := range array { switch resolver.Scope(varScope) { case resolver.Global: - p.globals[varIndex] = str(index) + p.globals[varIndex] = str(index, p.chars) case resolver.Local: - p.frame[varIndex] = str(index) + p.frame[varIndex] = str(index, p.chars) default: // resolver.Special - err := p.setSpecial(int(varIndex), str(index)) + err := p.setSpecial(int(varIndex), str(index, p.chars)) if err != nil { return err } @@ -683,7 +683,7 @@ func (p *interp) execute(code []compiler.Opcode) error { if err != nil { return err } - p.push(str(s)) + p.push(str(s, p.chars)) case compiler.CallUser: funcIndex := code[ip] @@ -852,7 +852,7 @@ func (p *interp) execute(code []compiler.Opcode) error { return err } if ret == 1 { - p.globals[index] = numStr(line) + p.globals[index] = numStr(line, p.chars) } p.push(num(ret)) @@ -866,7 +866,7 @@ func (p *interp) execute(code []compiler.Opcode) error { return err } if ret == 1 { - p.frame[index] = numStr(line) + p.frame[index] = numStr(line, p.chars) } p.push(num(ret)) @@ -880,7 +880,7 @@ func (p *interp) execute(code []compiler.Opcode) error { return err } if ret == 1 { - err := p.setSpecial(int(index), numStr(line)) + err := p.setSpecial(int(index), numStr(line, p.chars)) if err != nil { return err } @@ -900,7 +900,7 @@ func (p *interp) execute(code []compiler.Opcode) error { index := p.toString(p.peekTop()) if ret == 1 { array := p.array(resolver.Scope(arrayScope), int(arrayIndex)) - array[index] = numStr(line) + array[index] = numStr(line, p.chars) } p.replaceTop(num(ret)) } @@ -972,7 +972,7 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { if err != nil { return err } - p.replaceTwo(num(float64(n)), str(out)) + p.replaceTwo(num(float64(n)), str(out, p.chars)) case compiler.BuiltinIndex: sValue, substr := p.peekPop() @@ -1001,12 +1001,12 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { p.push(num(float64(length))) case compiler.BuiltinLengthArg: - s := p.toString(p.peekTop()) + s := p.peekTop() var length int if p.chars { - length = utf8.RuneCountInString(s) + length = len(s.runes(p.convertFormat)) } else { - length = len(s) + length = len(p.toString(s)) } p.replaceTop(num(float64(length))) @@ -1060,15 +1060,24 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { if err != nil { return err } - p.replaceTwo(num(float64(n)), str(out)) + p.replaceTwo(num(float64(n)), str(out, p.chars)) case compiler.BuiltinSubstr: sValue, posValue := p.peekPop() pos := int(posValue.num()) s := p.toString(sValue) - var substr string + var substr value if p.chars { - substr = substrChars(s, pos) + runes := sValue.runes(p.convertFormat) + if pos > len(runes) { + pos = len(runes) + 1 + } + if pos < 1 { + pos = 1 + } + length := len(runes) - pos + 1 + runes = runes[pos-1 : pos-1+length] + substr = strFromRunes(runes) } else { if pos > len(s) { pos = len(s) + 1 @@ -1077,19 +1086,34 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { pos = 1 } length := len(s) - pos + 1 - substr = s[pos-1 : pos-1+length] + substr = str(s[pos-1:pos-1+length], p.chars) } - p.replaceTop(str(substr)) + p.replaceTop(substr) case compiler.BuiltinSubstrLength: posValue, lengthValue := p.popTwo() length := int(lengthValue.num()) pos := int(posValue.num()) - s := p.toString(p.peekTop()) - var substr string + sValue := p.peekTop() + var substr value if p.chars { - substr = substrLengthChars(s, pos, length) + runes := sValue.runes(p.convertFormat) + if pos > len(runes) { + pos = len(runes) + 1 + } + if pos < 1 { + pos = 1 + } + maxLength := len(runes) - pos + 1 + if length < 0 { + length = 0 + } + if length > maxLength { + length = maxLength + } + substr = strFromRunes(runes[pos-1 : pos-1+length]) } else { + s := p.toString(sValue) if pos > len(s) { pos = len(s) + 1 } @@ -1103,9 +1127,9 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { if length > maxLength { length = maxLength } - substr = s[pos-1 : pos-1+length] + substr = str(s[pos-1:pos-1+length], p.chars) } - p.replaceTop(str(substr)) + p.replaceTop(substr) case compiler.BuiltinSystem: if p.noExec { @@ -1134,10 +1158,10 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { p.replaceTop(num(float64(exitCode))) case compiler.BuiltinTolower: - p.replaceTop(str(strings.ToLower(p.toString(p.peekTop())))) + p.replaceTop(str(strings.ToLower(p.toString(p.peekTop())), p.chars)) case compiler.BuiltinToupper: - p.replaceTop(str(strings.ToUpper(p.toString(p.peekTop())))) + p.replaceTop(str(strings.ToUpper(p.toString(p.peekTop())), p.chars)) } return nil