]> Cypherpunks repositories - gostls13.git/commitdiff
- handle UTF-8 text in tabwriter
authorRobert Griesemer <gri@golang.org>
Fri, 5 Dec 2008 17:22:13 +0000 (09:22 -0800)
committerRobert Griesemer <gri@golang.org>
Fri, 5 Dec 2008 17:22:13 +0000 (09:22 -0800)
R=r
DELTA=84  (27 added, 3 deleted, 54 changed)
OCL=20539
CL=20584

src/lib/tabwriter/tabwriter.go
src/lib/tabwriter/tabwriter_test.go

index ec6fadad1f1ad116ed4e105494c87c2cb9777b09..53a7961e5e81f8d610c5c716031f2408731abd9e 100644 (file)
@@ -8,12 +8,12 @@ import (
        "os";
        "io";
        "array";
+       "utf8";
 )
 
 
 // ----------------------------------------------------------------------------
 // ByteArray
-// TODO should use a ByteArray library eventually
 
 type ByteArray struct {
        a *[]byte;
@@ -62,11 +62,13 @@ func (b *ByteArray) Append(s *[]byte) {
 
 // ----------------------------------------------------------------------------
 // Writer is a filter implementing the io.Write interface. It assumes
-// that the incoming bytes represent ASCII encoded text consisting of
+// that the incoming bytes represent UTF-8 encoded text consisting of
 // lines of tab-terminated "cells". Cells in adjacent lines constitute
 // a column. Writer rewrites the incoming text such that all cells in
 // a column have the same width; thus it effectively aligns cells. It
-// does this by adding padding where necessary.
+// does this by adding padding where necessary. All characters (ASCII
+// or not) are assumed to be of the same width - this may not be true
+// for arbitrary UTF-8 characters visualized on the screen.
 //
 // Note that any text at the end of a line that is not tab-terminated
 // is not a cell and does not enforce alignment of cells in adjacent
@@ -84,8 +86,6 @@ func (b *ByteArray) Append(s *[]byte) {
 //            (for correct-looking results, cellwidth must correspond
 //            to the tabwidth in the editor used to look at the result)
 
-// TODO Should support UTF-8 (requires more complicated width bookkeeping)
-
 
 export type Writer struct {
        // TODO should not export any of the fields
@@ -97,15 +97,18 @@ export type Writer struct {
        align_left bool;
 
        // current state
-       buf ByteArray;  // the collected text w/o tabs and newlines
-       width int;  // width of last incomplete cell
-       lines array.Array;  // list of lines; each line is a list of cell widths
-       widths array.IntArray;  // list of column widths - re-used during formatting
+       buf ByteArray;  // collected text w/o tabs and newlines
+       size int;  // size of last incomplete cell in bytes
+       width int;  // width of last incomplete cell in runes
+       lines_size array.Array;  // list of lines; each line is a list of cell sizes in bytes
+       lines_width array.Array;  // list of lines; each line is a list of cell widths in runes
+       widths array.IntArray;  // list of column widths in runes - re-used during formatting
 }
 
 
 func (b *Writer) AddLine() {
-       b.lines.Push(array.NewIntArray(0));
+       b.lines_size.Push(array.NewIntArray(0));
+       b.lines_width.Push(array.NewIntArray(0));
 }
 
 
@@ -125,7 +128,8 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali
        b.align_left = align_left || padchar == '\t';  // tab enforces left-alignment
        
        b.buf.Init(1024);
-       b.lines.Init(0);
+       b.lines_size.Init(0);
+       b.lines_width.Init(0);
        b.widths.Init(0);
        b.AddLine();  // the very first line
        
@@ -133,21 +137,23 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali
 }
 
 
-func (b *Writer) Line(i int) *array.IntArray {
-       return b.lines.At(i).(*array.IntArray);
+func (b *Writer) Line(i int) (*array.IntArray, *array.IntArray) {
+       return
+               b.lines_size.At(i).(*array.IntArray),
+               b.lines_width.At(i).(*array.IntArray);
 }
 
 
 // debugging support
 func (b *Writer) Dump() {
        pos := 0;
-       for i := 0; i < b.lines.Len(); i++ {
-               line := b.Line(i);
+       for i := 0; i < b.lines_size.Len(); i++ {
+               line_size, line_width := b.Line(i);
                print("(", i, ") ");
-               for j := 0; j < line.Len(); j++ {
-                       w := line.At(j);
-                       print("[", string(b.buf.Slice(pos, pos + w)), "]");
-                       pos += w;
+               for j := 0; j < line_size.Len(); j++ {
+                       s := line_size.At(j);
+                       print("[", string(b.buf.Slice(pos, pos + s)), "]");
+                       pos += s;
                }
                print("\n");
        }
@@ -198,16 +204,16 @@ exit:
 func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error) {
        pos = pos0;
        for i := line0; i < line1; i++ {
-               line := b.Line(i);
-               for j := 0; j < line.Len(); j++ {
-                       w := line.At(j);
+               line_size, line_width := b.Line(i);
+               for j := 0; j < line_size.Len(); j++ {
+                       s, w := line_size.At(j), line_width.At(j);
 
                        if b.align_left {
-                               err = b.Write0(b.buf.a[pos : pos + w]);
+                               err = b.Write0(b.buf.a[pos : pos + s]);
                                if err != nil {
                                        goto exit;
                                }
-                               pos += w;
+                               pos += s;
                                if j < b.widths.Len() {
                                        err = b.WritePadding(w, b.widths.At(j));
                                        if err != nil {
@@ -223,20 +229,20 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error)
                                                goto exit;
                                        }
                                }
-                               err = b.Write0(b.buf.a[pos : pos + w]);
+                               err = b.Write0(b.buf.a[pos : pos + s]);
                                if err != nil {
                                        goto exit;
                                }
-                               pos += w;
+                               pos += s;
                        }
                }
                
-               if i+1 == b.lines.Len() {
+               if i+1 == b.lines_size.Len() {
                        // last buffered line - we don't have a newline, so just write
                        // any outstanding buffered data
-                       err = b.Write0(b.buf.a[pos : pos + b.width]);
-                       pos += b.width;
-                       b.width = 0;
+                       err = b.Write0(b.buf.a[pos : pos + b.size]);
+                       pos += b.size;
+                       b.size, b.width = 0, 0;
                } else {
                        // not the last line - write newline
                        err = b.Write0(Newline);
@@ -256,9 +262,9 @@ func (b *Writer) Format(pos0 int, line0, line1 int) (pos int, err *os.Error) {
        column := b.widths.Len();       
        last := line0;
        for this := line0; this < line1; this++ {
-               line := b.Line(this);
+               line_size, line_width := b.Line(this);
                
-               if column < line.Len() - 1 {
+               if column < line_size.Len() - 1 {
                        // cell exists in this column
                        // (note that the last cell per line is ignored)
                        
@@ -272,10 +278,10 @@ func (b *Writer) Format(pos0 int, line0, line1 int) (pos int, err *os.Error) {
                        // column block begin
                        width := b.cellwidth;  // minimal width
                        for ; this < line1; this++ {
-                               line = b.Line(this);
-                               if column < line.Len() - 1 {
+                               line_size, line_width = b.Line(this);
+                               if column < line_size.Len() - 1 {
                                        // cell exists in this column => update width
-                                       w := line.At(column) + b.padding;
+                                       w := line_width.At(column) + b.padding;
                                        if w > width {
                                                width = w;
                                        }
@@ -302,18 +308,35 @@ exit:
 }
 
 
+func UnicodeLen(buf *[]byte) int {
+       l := 0;
+       for i := 0; i < len(buf); {
+               if buf[i] < utf8.RuneSelf {
+                       i++;
+               } else {
+                       rune, size := utf8.DecodeRune(buf[i : len(buf)]);
+                       i += size;
+               }
+               l++;
+       }
+       return l;
+}
+
 func (b *Writer) Append(buf *[]byte) {
        b.buf.Append(buf);
-       b.width += len(buf);
+       b.size += len(buf);
+       b.width += UnicodeLen(buf);
 }
 
 
 /* export */ func (b *Writer) Flush() *os.Error {
-       dummy, err := b.Format(0, 0, b.lines.Len());
+       dummy, err := b.Format(0, 0, b.lines_size.Len());
        // reset (even in the presence of errors)
        b.buf.Clear();
-       b.width = 0;
-       b.lines.Init(0);
+       b.size, b.width = 0, 0;
+       b.lines_size.Init(0);
+       b.lines_width.Init(0);
        b.AddLine();
        return err;
 }
@@ -329,13 +352,14 @@ func (b *Writer) Append(buf *[]byte) {
                        i0 = i + 1;  // exclude ch from (next) cell
 
                        // terminate cell
-                       last := b.Line(b.lines.Len() - 1);
-                       last.Push(b.width);
-                       b.width = 0;
+                       last_size, last_width := b.Line(b.lines_size.Len() - 1);
+                       last_size.Push(b.size);
+                       last_width.Push(b.width);
+                       b.size, b.width = 0, 0;
 
                        if ch == '\n' {
                                b.AddLine();
-                               if last.Len() == 1 {
+                               if last_size.Len() == 1 {
                                        // The previous line has only one cell which does not have
                                        // an impact on the formatting of the following lines (the
                                        // last cell per line is ignored by Format), thus we can
index 03b0409c90e0ba160149ebbb1994695491563c9d..097a894823a515670d5fb5c70b750f2836fb8e3f 100644 (file)
@@ -189,24 +189,24 @@ export func Test(t *testing.T) {
 
        Check(
                t, 8, 1, ' ', true,
-               "a\tb\tc\n"
-               "aa\tbbb\tcccc\tddddd\n"
+               "\tb\tc\n"
+               "aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
                "aaa\tbbbb\n",
 
-               "a       b       c\n"
-               "aa      bbb     cccc    ddddd\n"
+               "       b       c\n"
+               "aa      本本本     cccc    ddddd\n"
                "aaa     bbbb\n"
        );
 
        Check(
                t, 8, 1, ' ', false,
-               "a\tb\tc\t\n"
-               "aa\tbbb\tcccc\tddddd\t\n"
-               "aaa\tbbbb\t\n",
+               "a\tè\tc\t\n"
+               "aa\tèèè\tcccc\tddddd\t\n"
+               "aaa\tèèèè\t\n",
 
-               "       a       b       c\n"
-               "      aa     bbb    cccc   ddddd\n"
-               "     aaa    bbbb\n"
+               "       a       è       c\n"
+               "      aa     èèè    cccc   ddddd\n"
+               "     aaa    èèèè\n"
        );
 
        Check(
@@ -233,7 +233,7 @@ export func Test(t *testing.T) {
 
        Check(
                t, 4, 1, '-', true,
-               "4444\t333\t22\t1\t333\n"
+               "4444\t日本語\t22\t1\t333\n"
                "999999999\t22\n"
                "7\t22\n"
                "\t\t\t88888888\n"
@@ -241,7 +241,7 @@ export func Test(t *testing.T) {
                "666666\t666666\t666666\t4444\n"
                "1\t1\t999999999\t0000000000\n",
 
-               "4444------333-22--1---333\n"
+               "4444------日本語-22--1---333\n"
                "999999999-22\n"
                "7---------22\n"
                "------------------88888888\n"