From f601d412ceae1338999b203c50168af34285c634 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 29 Jan 2019 23:06:06 -0500 Subject: [PATCH] fmt: scan new number syntax This CL updates fmt's scanner to accept the new number syntaxes: - Hexadecimal floating-point values. - Digit-separating underscores. - Leading 0b and 0o prefixes. See golang.org/design/19308-number-literals for background. For #12711. For #19308. For #28493. For #29008. Change-Id: I5582af5c94059c781e6cf4e862441d3df3006adf Reviewed-on: https://go-review.googlesource.com/c/160247 Reviewed-by: Robert Griesemer --- src/fmt/doc.go | 15 ++++++------- src/fmt/scan.go | 50 ++++++++++++++++++++++++++++++++------------ src/fmt/scan_test.go | 38 +++++++++++++++++++++++++++++++-- 3 files changed, 81 insertions(+), 22 deletions(-) diff --git a/src/fmt/doc.go b/src/fmt/doc.go index e0edff456c..2cb409b617 100644 --- a/src/fmt/doc.go +++ b/src/fmt/doc.go @@ -286,10 +286,10 @@ For example, %x will scan an integer as a hexadecimal number, and %v will scan the default representation format for the value. The Printf verbs %p and %T and the flags # and + are not implemented. - The verbs %e %E %f %F %g and %G are all equivalent and scan any - floating-point or complex value. For float and complex literals in - scientific notation, both the decimal (e) and binary (p) exponent - formats are supported (for example: "2.3e+7" and "4.5p-8"). + For floating-point and complex values, all valid formatting verbs + (%b %e %E %f %F %g %G %x %X and %v) are equivalent and accept + both decimal and hexadecimal notation (for example: "2.3e+7", "0x4.5p-8") + and digit-separating underscores (for example: "3.14159_26535_89793"). Input processed by verbs is implicitly space-delimited: the implementation of every verb except %c starts by discarding @@ -297,9 +297,10 @@ (and %v reading into a string) stops consuming input at the first space or newline character. - The familiar base-setting prefixes 0 (octal) and 0x - (hexadecimal) are accepted when scanning integers without - a format or with the %v verb. + The familiar base-setting prefixes 0b (binary), 0o and 0 (octal), + and 0x (hexadecimal) are accepted when scanning integers + without a format or with the %v verb, as are digit-separating + underscores. Width is interpreted in the input text but there is no syntax for scanning with a precision (no %5.2f, just %5f). diff --git a/src/fmt/scan.go b/src/fmt/scan.go index ae79e39dee..d42703cb71 100644 --- a/src/fmt/scan.go +++ b/src/fmt/scan.go @@ -562,7 +562,7 @@ const ( hexadecimalDigits = "0123456789aAbBcCdDeEfF" sign = "+-" period = "." - exponent = "eEp" + exponent = "eEpP" ) // getBase returns the numeric base represented by the verb and its digit string. @@ -609,20 +609,26 @@ func (s *ss) scanRune(bitSize int) int64 { return r } -// scanBasePrefix reports whether the integer begins with a 0 or 0x, +// scanBasePrefix reports whether the integer begins with a bas prefix // and returns the base, digit string, and whether a zero was found. // It is called only if the verb is %v. func (s *ss) scanBasePrefix() (base int, digits string, found bool) { if !s.peek("0") { - return 10, decimalDigits, false + return 0, decimalDigits + "_", false } s.accept("0") found = true // We've put a digit into the token buffer. - // Special cases for '0' && '0x' - base, digits = 8, octalDigits - if s.peek("xX") { - s.consume("xX", false) - base, digits = 16, hexadecimalDigits + // Special cases for 0, 0b, 0o, 0x. + base, digits = 0, octalDigits+"_" + if s.peek("bB") { + s.consume("bB", true) + base, digits = 0, binaryDigits+"_" + } else if s.peek("oO") { + s.consume("oO", true) + base, digits = 0, octalDigits+"_" + } else if s.peek("xX") { + s.consume("xX", true) + base, digits = 0, hexadecimalDigits+"_" } return } @@ -705,21 +711,27 @@ func (s *ss) floatToken() string { if s.accept("iI") && s.accept("nN") && s.accept("fF") { return string(s.buf) } + digits := decimalDigits + "_" + exp := exponent + if s.accept("0") && s.accept("xX") { + digits = hexadecimalDigits + "_" + exp = "pP" + } // digits? - for s.accept(decimalDigits) { + for s.accept(digits) { } // decimal point? if s.accept(period) { // fraction? - for s.accept(decimalDigits) { + for s.accept(digits) { } } // exponent? - if s.accept(exponent) { + if s.accept(exp) { // leading sign? s.accept(sign) // digits? - for s.accept(decimalDigits) { + for s.accept(decimalDigits + "_") { } } return string(s.buf) @@ -749,9 +761,21 @@ func (s *ss) complexTokens() (real, imag string) { return real, imagSign + imag } +func hasX(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] == 'x' || s[i] == 'X' { + return true + } + } + return false +} + // convertFloat converts the string to a float64value. func (s *ss) convertFloat(str string, n int) float64 { - if p := indexRune(str, 'p'); p >= 0 { + // strconv.ParseFloat will handle "+0x1.fp+2", + // but we have to implement our non-standard + // decimal+binary exponent mix (1.2p4) ourselves. + if p := indexRune(str, 'p'); p >= 0 && !hasX(str) { // Atof doesn't handle power-of-2 exponents, // but they're easy to evaluate. f, err := strconv.ParseFloat(str[:p], n) diff --git a/src/fmt/scan_test.go b/src/fmt/scan_test.go index d7019d9439..b14a6f5deb 100644 --- a/src/fmt/scan_test.go +++ b/src/fmt/scan_test.go @@ -124,12 +124,18 @@ var scanTests = []ScanTest{ {"T\n", &boolVal, true}, // boolean test vals toggle to be sure they are written {"F\n", &boolVal, false}, // restored to zero value {"21\n", &intVal, 21}, + {"2_1\n", &intVal, 21}, {"0\n", &intVal, 0}, {"000\n", &intVal, 0}, {"0x10\n", &intVal, 0x10}, + {"0x_1_0\n", &intVal, 0x10}, {"-0x10\n", &intVal, -0x10}, {"0377\n", &intVal, 0377}, + {"0_3_7_7\n", &intVal, 0377}, + {"0o377\n", &intVal, 0377}, + {"0o_3_7_7\n", &intVal, 0377}, {"-0377\n", &intVal, -0377}, + {"-0o377\n", &intVal, -0377}, {"0\n", &uintVal, uint(0)}, {"000\n", &uintVal, uint(0)}, {"0x10\n", &uintVal, uint(0x10)}, @@ -163,13 +169,20 @@ var scanTests = []ScanTest{ {"2.3e2\n", &float64Val, 2.3e2}, {"2.3p2\n", &float64Val, 2.3 * 4}, {"2.3p+2\n", &float64Val, 2.3 * 4}, - {"2.3p+66\n", &float64Val, 2.3 * (1 << 32) * (1 << 32) * 4}, - {"2.3p-66\n", &float64Val, 2.3 / ((1 << 32) * (1 << 32) * 4)}, + {"2.3p+66\n", &float64Val, 2.3 * (1 << 66)}, + {"2.3p-66\n", &float64Val, 2.3 / (1 << 66)}, + {"0x2.3p-66\n", &float64Val, float64(0x23) / (1 << 70)}, + {"2_3.4_5\n", &float64Val, 23.45}, {"2.35\n", &stringVal, "2.35"}, {"2345678\n", &bytesVal, []byte("2345678")}, {"(3.4e1-2i)\n", &complex128Val, 3.4e1 - 2i}, {"-3.45e1-3i\n", &complex64Val, complex64(-3.45e1 - 3i)}, {"-.45e1-1e2i\n", &complex128Val, complex128(-.45e1 - 100i)}, + {"-.4_5e1-1E2i\n", &complex128Val, complex128(-.45e1 - 100i)}, + {"0x1.0p1+0x1.0P2i\n", &complex128Val, complex128(2 + 4i)}, + {"-0x1p1-0x1p2i\n", &complex128Val, complex128(-2 - 4i)}, + {"-0x1ep-1-0x1p2i\n", &complex128Val, complex128(-15 - 4i)}, + {"-0x1_Ep-1-0x1p0_2i\n", &complex128Val, complex128(-15 - 4i)}, {"hello\n", &stringVal, "hello"}, // Carriage-return followed by newline. (We treat \r\n as \n always.) @@ -207,8 +220,15 @@ var scanfTests = []ScanfTest{ {"%v", "TRUE\n", &boolVal, true}, {"%t", "false\n", &boolVal, false}, {"%v", "-71\n", &intVal, -71}, + {"%v", "-7_1\n", &intVal, -71}, + {"%v", "0b111\n", &intVal, 7}, + {"%v", "0b_1_1_1\n", &intVal, 7}, {"%v", "0377\n", &intVal, 0377}, + {"%v", "0_3_7_7\n", &intVal, 0377}, + {"%v", "0o377\n", &intVal, 0377}, + {"%v", "0o_3_7_7\n", &intVal, 0377}, {"%v", "0x44\n", &intVal, 0x44}, + {"%v", "0x_4_4\n", &intVal, 0x44}, {"%d", "72\n", &intVal, 72}, {"%c", "a\n", &runeVal, 'a'}, {"%c", "\u5072\n", &runeVal, '\u5072'}, @@ -222,17 +242,31 @@ var scanfTests = []ScanfTest{ {"%x", "a75\n", &intVal, 0xa75}, {"%v", "71\n", &uintVal, uint(71)}, {"%d", "72\n", &uintVal, uint(72)}, + {"%d", "7_2\n", &uintVal, uint(7)}, // only %v takes underscores {"%d", "73\n", &uint8Val, uint8(73)}, {"%d", "74\n", &uint16Val, uint16(74)}, {"%d", "75\n", &uint32Val, uint32(75)}, {"%d", "76\n", &uint64Val, uint64(76)}, {"%b", "1001001\n", &uintVal, uint(73)}, + {"%b", "100_1001\n", &uintVal, uint(4)}, {"%o", "075\n", &uintVal, uint(075)}, + {"%o", "07_5\n", &uintVal, uint(07)}, // only %v takes underscores {"%x", "a75\n", &uintVal, uint(0xa75)}, {"%x", "A75\n", &uintVal, uint(0xa75)}, + {"%x", "A7_5\n", &uintVal, uint(0xa7)}, // only %v takes underscores {"%U", "U+1234\n", &intVal, int(0x1234)}, {"%U", "U+4567\n", &uintVal, uint(0x4567)}, + {"%e", "2.3\n", &float64Val, 2.3}, + {"%E", "2.3e1\n", &float32Val, float32(2.3e1)}, + {"%f", "2.3e2\n", &float64Val, 2.3e2}, + {"%g", "2.3p2\n", &float64Val, 2.3 * 4}, + {"%G", "2.3p+2\n", &float64Val, 2.3 * 4}, + {"%v", "2.3p+66\n", &float64Val, 2.3 * (1 << 66)}, + {"%f", "2.3p-66\n", &float64Val, 2.3 / (1 << 66)}, + {"%G", "0x2.3p-66\n", &float64Val, float64(0x23) / (1 << 70)}, + {"%E", "2_3.4_5\n", &float64Val, 23.45}, + // Strings {"%s", "using-%s\n", &stringVal, "using-%s"}, {"%x", "7573696e672d2578\n", &stringVal, "using-%x"}, -- 2.48.1