From e9d5a641d7c6c5d446c158f90249167133d5ccee Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 6 Mar 2012 00:36:12 -0500 Subject: [PATCH] strconv: add table-based isPrint Not used yet for simpler merge. R=golang-dev, r CC=golang-dev https://golang.org/cl/5756048 --- src/pkg/strconv/isprint.go | 521 +++++++++++++++++++++++++++++++++ src/pkg/strconv/makeisprint.go | 157 ++++++++++ src/pkg/strconv/quote.go | 56 ++++ 3 files changed, 734 insertions(+) create mode 100644 src/pkg/strconv/isprint.go create mode 100644 src/pkg/strconv/makeisprint.go diff --git a/src/pkg/strconv/isprint.go b/src/pkg/strconv/isprint.go new file mode 100644 index 0000000000..34fa4d8de7 --- /dev/null +++ b/src/pkg/strconv/isprint.go @@ -0,0 +1,521 @@ +// DO NOT EDIT. GENERATED BY +// go run makeisprint.go >x && mv x isprint.go + +package strconv + +// (474+134)*2 + (180+42)*4 = 2104 bytes + +var isPrint16 = []uint16{ + 0x0020, 0x007e, + 0x00a1, 0x0377, + 0x037a, 0x037e, + 0x0384, 0x0527, + 0x0531, 0x0556, + 0x0559, 0x058a, + 0x0591, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0606, 0x061b, + 0x061e, 0x070d, + 0x0710, 0x074a, + 0x074d, 0x07b1, + 0x07c0, 0x07fa, + 0x0800, 0x082d, + 0x0830, 0x085b, + 0x085e, 0x085e, + 0x0900, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09e3, + 0x09e6, 0x09fb, + 0x0a01, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a39, + 0x0a3c, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a51, 0x0a51, + 0x0a59, 0x0a5e, + 0x0a66, 0x0a75, + 0x0a81, 0x0ab9, + 0x0abc, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0af1, + 0x0b01, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b39, + 0x0b3c, 0x0b44, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b63, + 0x0b66, 0x0b77, + 0x0b82, 0x0b8a, + 0x0b8e, 0x0b95, + 0x0b99, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bcd, + 0x0bd0, 0x0bd0, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c39, + 0x0c3d, 0x0c4d, + 0x0c55, 0x0c59, + 0x0c60, 0x0c63, + 0x0c66, 0x0c6f, + 0x0c78, 0x0c7f, + 0x0c82, 0x0cb9, + 0x0cbc, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0ce3, + 0x0ce6, 0x0cf2, + 0x0d02, 0x0d3a, + 0x0d3d, 0x0d4e, + 0x0d57, 0x0d57, + 0x0d60, 0x0d63, + 0x0d66, 0x0d75, + 0x0d79, 0x0d7f, + 0x0d82, 0x0d96, + 0x0d9a, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e84, + 0x0e87, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0ea7, + 0x0eaa, 0x0ebd, + 0x0ec0, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f6c, + 0x0f71, 0x0fda, + 0x1000, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x124d, + 0x1250, 0x125d, + 0x1260, 0x128d, + 0x1290, 0x12b5, + 0x12b8, 0x12c5, + 0x12c8, 0x1315, + 0x1318, 0x135a, + 0x135d, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1400, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa0, 0x1aad, + 0x1b00, 0x1b4b, + 0x1b50, 0x1b7c, + 0x1b80, 0x1baa, + 0x1bae, 0x1bb9, + 0x1bc0, 0x1bf3, + 0x1bfc, 0x1c37, + 0x1c3b, 0x1c49, + 0x1c4d, 0x1c7f, + 0x1cd0, 0x1cf2, + 0x1d00, 0x1de6, + 0x1dfc, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f7d, + 0x1f80, 0x1fd3, + 0x1fd6, 0x1fef, + 0x1ff2, 0x1ffe, + 0x2010, 0x2027, + 0x2030, 0x205e, + 0x2070, 0x2071, + 0x2074, 0x209c, + 0x20a0, 0x20b9, + 0x20d0, 0x20f0, + 0x2100, 0x2189, + 0x2190, 0x23f3, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2b4c, + 0x2b50, 0x2b59, + 0x2c00, 0x2cf1, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d70, + 0x2d7f, 0x2d96, + 0x2da0, 0x2e31, + 0x2e80, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x31ba, + 0x31c0, 0x31e3, + 0x31f0, 0x4db5, + 0x4dc0, 0x9fcb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa4d0, 0xa62b, + 0xa640, 0xa673, + 0xa67c, 0xa697, + 0xa6a0, 0xa6f7, + 0xa700, 0xa791, + 0xa7a0, 0xa7a9, + 0xa7fa, 0xa82b, + 0xa830, 0xa839, + 0xa840, 0xa877, + 0xa880, 0xa8c4, + 0xa8ce, 0xa8d9, + 0xa8e0, 0xa8fb, + 0xa900, 0xa953, + 0xa95f, 0xa97c, + 0xa980, 0xa9d9, + 0xa9de, 0xa9df, + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa5c, 0xaa7b, + 0xaa80, 0xaac2, + 0xaadb, 0xaadf, + 0xab01, 0xab06, + 0xab09, 0xab0e, + 0xab11, 0xab16, + 0xab20, 0xab2e, + 0xabc0, 0xabed, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfbc1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe26, + 0xfe30, 0xfe6b, + 0xfe70, 0xfefc, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffee, + 0xfffc, 0xfffd, +} + +var isNotPrint16 = []uint16{ + 0x00ad, + 0x038b, + 0x038d, + 0x03a2, + 0x0560, + 0x0588, + 0x06dd, + 0x083f, + 0x0978, + 0x0980, + 0x0984, + 0x09a9, + 0x09b1, + 0x09de, + 0x0a04, + 0x0a29, + 0x0a31, + 0x0a34, + 0x0a37, + 0x0a3d, + 0x0a5d, + 0x0a84, + 0x0a8e, + 0x0a92, + 0x0aa9, + 0x0ab1, + 0x0ab4, + 0x0ac6, + 0x0aca, + 0x0af0, + 0x0b04, + 0x0b29, + 0x0b31, + 0x0b34, + 0x0b5e, + 0x0b84, + 0x0b91, + 0x0b9b, + 0x0b9d, + 0x0bc9, + 0x0c04, + 0x0c0d, + 0x0c11, + 0x0c29, + 0x0c34, + 0x0c45, + 0x0c49, + 0x0c57, + 0x0c84, + 0x0c8d, + 0x0c91, + 0x0ca9, + 0x0cb4, + 0x0cc5, + 0x0cc9, + 0x0cdf, + 0x0cf0, + 0x0d04, + 0x0d0d, + 0x0d11, + 0x0d45, + 0x0d49, + 0x0d84, + 0x0db2, + 0x0dbc, + 0x0dd5, + 0x0dd7, + 0x0e83, + 0x0e89, + 0x0e98, + 0x0ea0, + 0x0ea4, + 0x0ea6, + 0x0eac, + 0x0eba, + 0x0ec5, + 0x0ec7, + 0x0f48, + 0x0f98, + 0x0fbd, + 0x0fcd, + 0x1249, + 0x1257, + 0x1259, + 0x1289, + 0x12b1, + 0x12bf, + 0x12c1, + 0x12d7, + 0x1311, + 0x1680, + 0x170d, + 0x176d, + 0x1771, + 0x1a5f, + 0x1f58, + 0x1f5a, + 0x1f5c, + 0x1f5e, + 0x1fb5, + 0x1fc5, + 0x1fdc, + 0x1ff5, + 0x208f, + 0x2700, + 0x27cb, + 0x27cd, + 0x2c2f, + 0x2c5f, + 0x2da7, + 0x2daf, + 0x2db7, + 0x2dbf, + 0x2dc7, + 0x2dcf, + 0x2dd7, + 0x2ddf, + 0x2e9a, + 0x3040, + 0x318f, + 0x321f, + 0x32ff, + 0xa78f, + 0xa9ce, + 0xab27, + 0xfb37, + 0xfb3d, + 0xfb3f, + 0xfb42, + 0xfb45, + 0xfe53, + 0xfe67, + 0xfe75, + 0xffe7, +} + +var isPrint32 = []uint32{ + 0x000020, 0x00007e, + 0x0000a1, 0x000377, + 0x00037a, 0x00037e, + 0x000384, 0x000527, + 0x000531, 0x000556, + 0x000559, 0x00058a, + 0x000591, 0x0005c7, + 0x0005d0, 0x0005ea, + 0x0005f0, 0x0005f4, + 0x000606, 0x00061b, + 0x00061e, 0x00070d, + 0x000710, 0x00074a, + 0x00074d, 0x0007b1, + 0x0007c0, 0x0007fa, + 0x000800, 0x00082d, + 0x000830, 0x00085b, + 0x00085e, 0x00085e, + 0x000900, 0x00098c, + 0x00098f, 0x000990, + 0x000993, 0x0009b2, + 0x0009b6, 0x0009b9, + 0x0009bc, 0x0009c4, + 0x0009c7, 0x0009c8, + 0x0009cb, 0x0009ce, + 0x0009d7, 0x0009d7, + 0x0009dc, 0x0009e3, + 0x0009e6, 0x0009fb, + 0x000a01, 0x000a0a, + 0x000a0f, 0x000a10, + 0x000a13, 0x000a39, + 0x000a3c, 0x000a42, + 0x000a47, 0x000a48, + 0x000a4b, 0x000a4d, + 0x000a51, 0x000a51, + 0x000a59, 0x000a5e, + 0x000a66, 0x000a75, + 0x000a81, 0x000ab9, + 0x000abc, 0x000acd, + 0x000ad0, 0x000ad0, + 0x000ae0, 0x000ae3, + 0x000ae6, 0x000af1, + 0x000b01, 0x000b0c, + 0x000b0f, 0x000b10, + 0x000b13, 0x000b39, + 0x000b3c, 0x000b44, + 0x000b47, 0x000b48, + 0x000b4b, 0x000b4d, + 0x000b56, 0x000b57, + 0x000b5c, 0x000b63, + 0x000b66, 0x000b77, + 0x000b82, 0x000b8a, + 0x000b8e, 0x000b95, + 0x000b99, 0x000b9f, + 0x000ba3, 0x000ba4, + 0x000ba8, 0x000baa, + 0x000bae, 0x000bb9, + 0x000bbe, 0x000bc2, + 0x000bc6, 0x000bcd, + 0x000bd0, 0x000bd0, + 0x000bd7, 0x000bd7, + 0x000be6, 0x000bfa, + 0x000c01, 0x000c39, + 0x000c3d, 0x000c4d, + 0x000c55, 0x000c59, + 0x000c60, 0x000c63, + 0x000c66, 0x000c6f, + 0x000c78, 0x000c7f, + 0x000c82, 0x000cb9, + 0x000cbc, 0x000ccd, + 0x000cd5, 0x000cd6, + 0x000cde, 0x000ce3, + 0x000ce6, 0x000cf2, + 0x000d02, 0x000d3a, + 0x000d3d, 0x000d4e, + 0x000d57, 0x000d57, + 0x000d60, 0x000d63, + 0x000d66, 0x000d75, + 0x000d79, 0x000d7f, + 0x000d82, 0x000d96, + 0x000d9a, 0x000dbd, + 0x000dc0, 0x000dc6, + 0x000dca, 0x000dca, + 0x000dcf, 0x000ddf, + 0x000df2, 0x000df4, + 0x000e01, 0x000e3a, + 0x000e3f, 0x000e5b, + 0x000e81, 0x000e84, + 0x000e87, 0x000e8a, + 0x000e8d, 0x000e8d, + 0x000e94, 0x000ea7, +} + +var isNotPrint32 = []uint32{ + 0x1000c, + 0x10027, + 0x1003b, + 0x1003e, + 0x1031f, + 0x1039e, + 0x10809, + 0x10836, + 0x10856, + 0x10a04, + 0x10a14, + 0x10a18, + 0x110bd, + 0x1d455, + 0x1d49d, + 0x1d4ad, + 0x1d4ba, + 0x1d4bc, + 0x1d4c4, + 0x1d506, + 0x1d515, + 0x1d51d, + 0x1d53a, + 0x1d53f, + 0x1d545, + 0x1d551, + 0x1f0d0, + 0x1f12f, + 0x1f336, + 0x1f3c5, + 0x1f43f, + 0x1f441, + 0x1f4f8, + 0x1f600, + 0x1f611, + 0x1f615, + 0x1f617, + 0x1f619, + 0x1f61b, + 0x1f61f, + 0x1f62c, + 0x1f634, +} diff --git a/src/pkg/strconv/makeisprint.go b/src/pkg/strconv/makeisprint.go new file mode 100644 index 0000000000..4ff2294f40 --- /dev/null +++ b/src/pkg/strconv/makeisprint.go @@ -0,0 +1,157 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// makeisprint generates the tables for strconv's compact isPrint. +package main + +import ( + "fmt" + "unicode" +) + +var ( + range16 []uint16 + except16 []uint16 + range32 []uint32 + except32 []uint32 +) + +// bsearch16 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch16 returns len(a). +func bsearch16(a []uint16, x uint16) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +// bsearch32 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch32 returns len(a). +func bsearch32(a []uint32, x uint32) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +func isPrint(r rune) bool { + // Same algorithm, either on uint16 or uint32 value. + // First, find first i such that rang[i] >= x. + // This is the index of either the start or end of a pair that might span x. + // The start is even (rang[i&^1]) and the end is odd (rang[i|1]). + // If we find x in a range, make sure x is not in exception list. + + if 0 <= r && r < 1<<16 { + rr, rang, except := uint16(r), range16, except16 + i := bsearch16(rang, rr) + if i >= len(rang) || rr < rang[i&^1] || rang[i|1] < rr { + return false + } + j := bsearch16(except, rr) + return j >= len(except) || except[j] != rr + } + + rr, rang, except := uint32(r), range32, except32 + i := bsearch32(rang, rr) + if i >= len(rang) || rr < rang[i&^1] || rang[i|1] < rr { + return false + } + j := bsearch32(except, rr) + return j >= len(except) || except[j] != rr +} + +func scan(min, max rune) (rang, except []uint32) { + lo := rune(-1) + for i := min; ; i++ { + if (i > max || !unicode.IsPrint(i)) && lo >= 0 { + // End range, but avoid flip flop. + if i+1 <= max && unicode.IsPrint(i+1) { + except = append(except, uint32(i)) + continue + } + rang = append(rang, uint32(lo), uint32(i-1)) + lo = -1 + } + if i > max { + break + } + if lo < 0 && unicode.IsPrint(i) { + lo = i + } + } + return +} + +func to16(x []uint32) []uint16 { + var y []uint16 + for _, v := range x { + if uint32(uint16(v)) != v { + panic("bad 32->16 conversion") + } + y = append(y, uint16(v)) + } + return y +} + +func main() { + rang, except := scan(0, 0xFFFF) + range16 = to16(rang) + except16 = to16(except) + range32, except32 = scan(0x10000, unicode.MaxRune) + + for i := rune(0); i <= unicode.MaxRune; i++ { + if isPrint(i) != unicode.IsPrint(i) { + fmt.Printf("%U: isPrint=%v, want %v\n", i, isPrint(i), unicode.IsPrint(i)) + break + } + } + + fmt.Printf("// DO NOT EDIT. GENERATED BY\n") + fmt.Printf("// go run makeisprint.go >x && mv x isprint.go\n\n") + fmt.Printf("package strconv\n\n") + + fmt.Printf("// (%d+%d)*2 + (%d+%d)*4 = %d bytes\n\n", + len(range16), len(except16), + len(range32), len(except32), + (len(range16)+len(except16))*2+ + (len(range32)+len(except32))*4) + + fmt.Printf("var isPrint16 = []uint16{\n") + for i := 0; i < len(range16); i += 2 { + fmt.Printf("\t%#04x, %#04x,\n", range16[i], range16[i+1]) + } + fmt.Printf("}\n\n") + + fmt.Printf("var isNotPrint16 = []uint16{\n") + for _, r := range except16 { + fmt.Printf("\t%#04x,\n", r) + } + fmt.Printf("}\n\n") + + fmt.Printf("var isPrint32 = []uint32{\n") + for i := 0; i < len(range32); i += 2 { + fmt.Printf("\t%#06x, %#06x,\n", range16[i], range16[i+1]) + } + fmt.Printf("}\n\n") + + fmt.Printf("var isNotPrint32 = []uint32{\n") + for _, r := range except32 { + fmt.Printf("\t%#04x,\n", r) + } + fmt.Printf("}\n") +} diff --git a/src/pkg/strconv/quote.go b/src/pkg/strconv/quote.go index 57cdae1738..c07063c030 100644 --- a/src/pkg/strconv/quote.go +++ b/src/pkg/strconv/quote.go @@ -351,3 +351,59 @@ func Unquote(s string) (t string, err error) { } return string(buf), nil } + +// bsearch16 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch16 returns len(a). +func bsearch16(a []uint16, x uint16) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +// bsearch32 returns the smallest i such that a[i] >= x. +// If there is no such i, bsearch32 returns len(a). +func bsearch32(a []uint32, x uint32) int { + i, j := 0, len(a) + for i < j { + h := i + (j-i)/2 + if a[h] < x { + i = h + 1 + } else { + j = h + } + } + return i +} + +func isPrint(r rune) bool { + // Same algorithm, either on uint16 or uint32 value. + // First, find first i such that isPrint[i] >= x. + // This is the index of either the start or end of a pair that might span x. + // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). + // If we find x in a range, make sure x is not in isNotPrint list. + + if 0 <= r && r < 1<<16 { + rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 + i := bsearch16(isPrint, rr) + if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { + return false + } + j := bsearch16(isNotPrint, rr) + return j >= len(isNotPrint) || isNotPrint[j] != rr + } + + rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 + i := bsearch32(isPrint, rr) + if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { + return false + } + j := bsearch32(isNotPrint, rr) + return j >= len(isNotPrint) || isNotPrint[j] != rr +} -- 2.50.0