fmt: add %U format for standard Unicode representation of integer values.

author Rob Pike <r@golang.org>

Mon, 6 Dec 2010 19:23:37 +0000 (14:23 -0500)

committer Rob Pike <r@golang.org>

Mon, 6 Dec 2010 19:23:37 +0000 (14:23 -0500)
author Rob Pike <r@golang.org>
Mon, 6 Dec 2010 19:23:37 +0000 (14:23 -0500)
committer Rob Pike <r@golang.org>
Mon, 6 Dec 2010 19:23:37 +0000 (14:23 -0500)
diff --git a/src/pkg/fmt/doc.go b/src/pkg/fmt/doc.go

index 15aae50e3d6ae0a87a8b520be3b90259720449b3..f3067eac9f1d1e2afb91ae39f102adff8dd56d99 100644 (file)
--- a/src/pkg/fmt/doc.go
+++ b/src/pkg/fmt/doc.go
@@ -26,6 +26,7 @@
                 %o      base 8
                 %x      base 16, with lower-case letters for a-f
                 %X      base 16, with upper-case letters for A-F
+               %U      unicode format: U+1234; same as "U+%x" with 4 digits default
         Floating-point and complex constituents:
                 %e      scientific notation, e.g. -1234.456e+78
                 %E      scientific notation, e.g. -1234.456E+78
diff --git a/src/pkg/fmt/fmt_test.go b/src/pkg/fmt/fmt_test.go

index fbc2536ee15a7a65791e2be3904c27a957f979d3..d87b93a7956b1910bd68f68aed25aa988be81930 100644 (file)
--- a/src/pkg/fmt/fmt_test.go
+++ b/src/pkg/fmt/fmt_test.go
@@ -161,6 +161,14 @@ var fmttests = []fmtTest{
         {"% d", 0, " 0"},
         {"% d", 12345, " 12345"},
  
+       // unicode format
+       {"%U", 0x1, "U+0001"},
+       {"%.8U", 0x2, "U+00000002"},
+       {"%U", 0x1234, "U+1234"},
+       {"%U", 0x12345, "U+12345"},
+       {"%10.6U", 0xABC, "  U+000ABC"},
+       {"%-10.6U", 0xABC, "U+000ABC  "},
+
         // floats
         {"%+.3e", 0.0, "+0.000e+00"},
         {"%+.3e", 1.0, "+1.000e+00"},
diff --git a/src/pkg/fmt/format.go b/src/pkg/fmt/format.go

index 010280bf851b8a133632aa1b0d4230282399c231..0121dda31dc3317c3aab1b81f6aa36a2bf97617e 100644 (file)
--- a/src/pkg/fmt/format.go
+++ b/src/pkg/fmt/format.go
@@ -49,6 +49,7 @@ type fmt struct {
         plus        bool
         sharp       bool
         space       bool
+       unicode     bool
         zero        bool
  }
  
@@ -61,6 +62,7 @@ func (f *fmt) clearflags() {
         f.plus = false
         f.sharp = false
         f.space = false
+       f.unicode = false
         f.zero = false
  }
  
@@ -213,6 +215,12 @@ func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) {
                         buf[i] = '0'
                 }
         }
+       if f.unicode {
+               i--
+               buf[i] = '+'
+               i--
+               buf[i] = 'U'
+       }
  
         if negative {
                 i--
diff --git a/src/pkg/fmt/print.go b/src/pkg/fmt/print.go

index 3bb14eeb1466ecbf29843036f5e8c2f4225d08f0..7ac6648c70c309fba44a7c44a5ff3e347980ba7a 100644 (file)
--- a/src/pkg/fmt/print.go
+++ b/src/pkg/fmt/print.go
@@ -316,6 +316,8 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
                 p.fmt.integer(v, 8, signed, ldigits)
         case 'x':
                 p.fmt.integer(v, 16, signed, ldigits)
+       case 'U':
+               p.fmtUnicode(v)
         case 'X':
                 p.fmt.integer(v, 16, signed, udigits)
         default:
@@ -323,7 +325,7 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
         }
  }
  
-// fmt_sharpHex64 formats a uint64 in hexadecimal and prefixes it with 0x by
+// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by
  // temporarily turning on the sharp flag.
  func (p *pp) fmt0x64(v uint64) {
         sharp := p.fmt.sharp
@@ -332,6 +334,23 @@ func (p *pp) fmt0x64(v uint64) {
         p.fmt.sharp = sharp
  }
  
+// fmtUnicode formats a uint64 in U+1234 form by
+// temporarily turning on the unicode flag and tweaking the precision.
+func (p *pp) fmtUnicode(v int64) {
+       precPresent := p.fmt.precPresent
+       prec := p.fmt.prec
+       if !precPresent {
+               // If prec is already set, leave it alone; otherwise 4 is minimum.
+               p.fmt.prec = 4
+               p.fmt.precPresent = true
+       }
+       p.fmt.unicode = true // turn on U+
+       p.fmt.integer(int64(v), 16, unsigned, udigits)
+       p.fmt.unicode = false
+       p.fmt.prec = prec
+       p.fmt.precPresent = precPresent
+}
+
  func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) {
         switch verb {
         case 'b':
diff --git a/src/pkg/fmt/scan.go b/src/pkg/fmt/scan.go

index 9b414cb9a7efecbbb8aaca9def25ab8e7b04e15d..dcc42bc92daf379413b40a682284a88b12e773c2 100644 (file)
--- a/src/pkg/fmt/scan.go
+++ b/src/pkg/fmt/scan.go
@@ -388,9 +388,9 @@ func (s *ss) typeError(field interface{}, expected string) {
  var complexError = os.ErrorString("syntax error scanning complex number")
  var boolError = os.ErrorString("syntax error scanning boolean")
  
-// accepts checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
-// buffer and returns true. Otherwise it return false.
-func (s *ss) accept(ok string) bool {
+// consume reads the next rune in the input and reports whether it is in the ok string.
+// If accept is true, it puts the character into the input token.
+func (s *ss) consume(ok string, accept bool) bool {
         if s.wid >= s.maxWid {
                 return false
         }
@@ -400,17 +400,25 @@ func (s *ss) accept(ok string) bool {
         }
         for i := 0; i < len(ok); i++ {
                 if int(ok[i]) == rune {
-                       s.buf.WriteRune(rune)
-                       s.wid++
+                       if accept {
+                               s.buf.WriteRune(rune)
+                               s.wid++
+                       }
                         return true
                 }
         }
-       if rune != EOF {
+       if rune != EOF && accept {
                 s.UngetRune()
         }
         return false
  }
  
+// accept checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
+// buffer and returns true. Otherwise it return false.
+func (s *ss) accept(ok string) bool {
+       return s.consume(ok, true)
+}
+
  // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
  func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
         for _, v := range okVerbs {
@@ -460,7 +468,7 @@ const (
  
  // getBase returns the numeric base represented by the verb and its digit string.
  func (s *ss) getBase(verb int) (base int, digits string) {
-       s.okVerb(verb, "bdoxXv", "integer") // sets s.err
+       s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
         base = 10
         digits = decimalDigits
         switch verb {
@@ -470,7 +478,7 @@ func (s *ss) getBase(verb int) (base int, digits string) {
         case 'o':
                 base = 8
                 digits = octalDigits
-       case 'x', 'X':
+       case 'x', 'X', 'U':
                 base = 16
                 digits = hexadecimalDigits
         }
@@ -506,7 +514,13 @@ func (s *ss) scanInt(verb int, bitSize int) int64 {
         }
         base, digits := s.getBase(verb)
         s.skipSpace(false)
-       s.accept(sign) // If there's a sign, it will be left in the token buffer.
+       if verb == 'U' {
+               if !s.consume("U", false) || !s.consume("+", false) {
+                       s.errorString("bad unicode format ")
+               }
+       } else {
+               s.accept(sign) // If there's a sign, it will be left in the token buffer.
+       }
         tok := s.scanNumber(digits)
         i, err := strconv.Btoi64(tok, base)
         if err != nil {
@@ -528,6 +542,11 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 {
         }
         base, digits := s.getBase(verb)
         s.skipSpace(false)
+       if verb == 'U' {
+               if !s.consume("U", false) || !s.consume("+", false) {
+                       s.errorString("bad unicode format ")
+               }
+       }
         tok := s.scanNumber(digits)
         i, err := strconv.Btoui64(tok, base)
         if err != nil {
diff --git a/src/pkg/fmt/scan_test.go b/src/pkg/fmt/scan_test.go

index cf8a3a766f10c719447da6906a7d7dff489db39d..7a0baae2455cf55fcdaf77d56cddadcfa5addff1 100644 (file)
--- a/src/pkg/fmt/scan_test.go
+++ b/src/pkg/fmt/scan_test.go
@@ -222,6 +222,8 @@ var scanfTests = []ScanfTest{
         {"%o", "075\n", &uintVal, uint(075)},
         {"%x", "a75\n", &uintVal, uint(0xa75)},
         {"%x", "A75\n", &uintVal, uint(0xa75)},
+       {"%U", "U+1234\n", &intVal, int(0x1234)},
+       {"%U", "U+4567\n", &uintVal, uint(0x4567)},
  
         // Strings
         {"%s", "using-%s\n", &stringVal, "using-%s"},
author	Rob Pike <r@golang.org>
	Mon, 6 Dec 2010 19:23:37 +0000 (14:23 -0500)
committer	Rob Pike <r@golang.org>
	Mon, 6 Dec 2010 19:23:37 +0000 (14:23 -0500)
src/pkg/fmt/doc.go		patch \| blob \| history
src/pkg/fmt/fmt_test.go		patch \| blob \| history
src/pkg/fmt/format.go		patch \| blob \| history
src/pkg/fmt/print.go		patch \| blob \| history
src/pkg/fmt/scan.go		patch \| blob \| history
src/pkg/fmt/scan_test.go		patch \| blob \| history