]> Cypherpunks repositories - gostls13.git/commitdiff
improved sentence extraction:
authorRobert Griesemer <gri@golang.org>
Mon, 9 Nov 2009 05:48:51 +0000 (21:48 -0800)
committerRobert Griesemer <gri@golang.org>
Mon, 9 Nov 2009 05:48:51 +0000 (21:48 -0800)
- don't forget first periods
- look at capitalization of last char before periods

R=rsc
http://go/go-review/1024027

src/cmd/godoc/godoc.go

index 2acaa7cfe3f31e0734d3621c3aac1461106f9d63..eb97253508c5ca37a4bcf78bf59e3535fa790318 100644 (file)
@@ -23,6 +23,7 @@ import (
        "sync";
        "template";
        "time";
+       "unicode";
        "utf8";
 )
 
@@ -137,21 +138,38 @@ func htmlEscape(s string) string {
 
 
 func firstSentence(s string) string {
-       // find first period followed by whitespace, or just the first period
-       i := -1;
-       for j, ch := range s {
+       i := -1;        // index+1 of first period
+       j := -1;        // index+1 of first period that is followed by white space
+       prev := 'A';
+       for k, ch := range s {
+               k1 := k+1;
                if ch == '.' {
-                       i = j+1;        // include period
-                       if i < len(s) && s[i] <= ' ' {
-                               break;
+                       if i < 0 {
+                               i = k1; // first period
+                       }
+                       if k1 < len(s) && s[k1] <= ' ' {
+                               if j < 0 {
+                                       j = k1; // first period followed by white space
+                               }
+                               if !unicode.IsUpper(prev) {
+                                       j = k1;
+                                       break;
+                               }
                        }
                }
+               prev = ch;
        }
-       if i < 0 {
-               // no period found, use the enire string
-               i = len(s);
+
+       if j < 0 {
+               // use the next best period
+               j = i;
+               if j < 0 {
+                       // no period at all, use the entire string
+                       j = len(s);
+               }
        }
-       return s[0:i];
+
+       return s[0:j];
 }