hash/crc32: cleanup code and improve tests

author Radu Berinde <radu@cockroachlabs.com>

Sun, 28 Aug 2016 18:36:06 +0000 (14:36 -0400)

committer Brad Fitzpatrick <bradfitz@golang.org>

Wed, 31 Aug 2016 15:17:57 +0000 (15:17 +0000)
author Radu Berinde <radu@cockroachlabs.com>
Sun, 28 Aug 2016 18:36:06 +0000 (14:36 -0400)
committer Brad Fitzpatrick <bradfitz@golang.org>
Wed, 31 Aug 2016 15:17:57 +0000 (15:17 +0000)
diff --git a/src/hash/crc32/crc32.go b/src/hash/crc32/crc32.go

index 6eed8ff30098c6cd272a2fca63e17e8bfd48f2e7..8aa91b17e90d56714baa02fe3793461cedeb10d2 100644 (file)
--- a/src/hash/crc32/crc32.go
+++ b/src/hash/crc32/crc32.go
@@ -20,9 +20,6 @@ import (
  // The size of a CRC-32 checksum in bytes.
  const Size = 4
  
-// Use "slice by 8" when payload >= this value.
-const sliceBy8Cutoff = 16
-
  // Predefined polynomials.
  const (
         // IEEE is by far and away the most common CRC-32 polynomial.
@@ -43,80 +40,96 @@ const (
  // Table is a 256-word table representing the polynomial for efficient processing.
  type Table [256]uint32
  
+// This file makes use of functions implemented in architecture-specific files.
+// The interface that they implement is as follows:
+//
+//    // archAvailableIEEE reports whether an architecture-specific CRC32-IEEE
+//    // algorithm is available.
+//    archAvailableIEEE() bool
+//
+//    // archInitIEEE initializes the architecture-specific CRC3-IEEE algorithm.
+//    // It can only be called if archAvailableIEEE() returns true.
+//    archInitIEEE()
+//
+//    // archUpdateIEEE updates the given CRC32-IEEE. It can only be called if
+//    // archInitIEEE() was previously called.
+//    archUpdateIEEE(crc uint32, p []byte) uint32
+//
+//    // archAvailableCastagnoli reports whether an architecture-specific
+//    // CRC32-C algorithm is available.
+//    archAvailableCastagnoli() bool
+//
+//    // archInitCastagnoli initializes the architecture-specific CRC32-C
+//    // algorithm. It can only be called if archAvailableCastagnoli() returns
+//    // true.
+//    archInitCastagnoli()
+//
+//    // archUpdateCastagnoli updates the given CRC32-C. It can only be called
+//    // if archInitCastagnoli() was previously called.
+//    archUpdateCastagnoli(crc uint32, p []byte) uint32
+
  // castagnoliTable points to a lazily initialized Table for the Castagnoli
  // polynomial. MakeTable will always return this value when asked to make a
  // Castagnoli table so we can compare against it to find when the caller is
  // using this polynomial.
  var castagnoliTable *Table
  var castagnoliTable8 *slicing8Table
+var castagnoliArchImpl bool
+var updateCastagnoli func(crc uint32, p []byte) uint32
  var castagnoliOnce sync.Once
  
  func castagnoliInit() {
-       // Call the arch-specific init function and let it decide if we will need
-       // the tables for the generic implementation.
-       needGenericTables := castagnoliInitArch()
-
-       if needGenericTables {
-               castagnoliTable8 = makeTable8(Castagnoli)
+       castagnoliTable = simpleMakeTable(Castagnoli)
+       castagnoliArchImpl = archAvailableCastagnoli()
+
+       if castagnoliArchImpl {
+               archInitCastagnoli()
+               updateCastagnoli = archUpdateCastagnoli
+       } else {
+               // Initialize the slicing-by-8 table.
+               castagnoliTable8 = slicingMakeTable(Castagnoli)
+               updateCastagnoli = func(crc uint32, p []byte) uint32 {
+                       return slicingUpdate(crc, castagnoliTable8, p)
+               }
         }
-
-       // Even if we don't need the contents of this table, we use it as a handle
-       // returned by MakeTable. We should find a way to clean this up (see #16909).
-       castagnoliTable = makeTable(Castagnoli)
  }
  
  // IEEETable is the table for the IEEE polynomial.
-var IEEETable = makeTable(IEEE)
-
-// slicing8Table is array of 8 Tables
-type slicing8Table [8]Table
+var IEEETable = simpleMakeTable(IEEE)
  
  // ieeeTable8 is the slicing8Table for IEEE
  var ieeeTable8 *slicing8Table
-var ieeeTable8Once sync.Once
+var ieeeArchImpl bool
+var updateIEEE func(crc uint32, p []byte) uint32
+var ieeeOnce sync.Once
+
+func ieeeInit() {
+       ieeeArchImpl = archAvailableIEEE()
+
+       if ieeeArchImpl {
+               archInitIEEE()
+               updateIEEE = archUpdateIEEE
+       } else {
+               // Initialize the slicing-by-8 table.
+               ieeeTable8 = slicingMakeTable(IEEE)
+               updateIEEE = func(crc uint32, p []byte) uint32 {
+                       return slicingUpdate(crc, ieeeTable8, p)
+               }
+       }
+}
  
  // MakeTable returns a Table constructed from the specified polynomial.
  // The contents of this Table must not be modified.
  func MakeTable(poly uint32) *Table {
         switch poly {
         case IEEE:
+               ieeeOnce.Do(ieeeInit)
                 return IEEETable
         case Castagnoli:
                 castagnoliOnce.Do(castagnoliInit)
                 return castagnoliTable
         }
-       return makeTable(poly)
-}
-
-// makeTable returns the Table constructed from the specified polynomial.
-func makeTable(poly uint32) *Table {
-       t := new(Table)
-       for i := 0; i < 256; i++ {
-               crc := uint32(i)
-               for j := 0; j < 8; j++ {
-                       if crc&1 == 1 {
-                               crc = (crc >> 1) ^ poly
-                       } else {
-                               crc >>= 1
-                       }
-               }
-               t[i] = crc
-       }
-       return t
-}
-
-// makeTable8 returns slicing8Table constructed from the specified polynomial.
-func makeTable8(poly uint32) *slicing8Table {
-       t := new(slicing8Table)
-       t[0] = *makeTable(poly)
-       for i := 0; i < 256; i++ {
-               crc := t[0][i]
-               for j := 1; j < 8; j++ {
-                       crc = t[0][crc&0xFF] ^ (crc >> 8)
-                       t[j][i] = crc
-               }
-       }
-       return t
+       return simpleMakeTable(poly)
  }
  
  // digest represents the partial evaluation of a checksum.
@@ -128,7 +141,12 @@ type digest struct {
  // New creates a new hash.Hash32 computing the CRC-32 checksum
  // using the polynomial represented by the Table.
  // Its Sum method will lay the value out in big-endian byte order.
-func New(tab *Table) hash.Hash32 { return &digest{0, tab} }
+func New(tab *Table) hash.Hash32 {
+       if tab == IEEETable {
+               ieeeOnce.Do(ieeeInit)
+       }
+       return &digest{0, tab}
+}
  
  // NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum
  // using the IEEE polynomial.
@@ -141,44 +159,32 @@ func (d *digest) BlockSize() int { return 1 }
  
  func (d *digest) Reset() { d.crc = 0 }
  
-func update(crc uint32, tab *Table, p []byte) uint32 {
-       crc = ^crc
-       for _, v := range p {
-               crc = tab[byte(crc)^v] ^ (crc >> 8)
-       }
-       return ^crc
-}
-
-// updateSlicingBy8 updates CRC using Slicing-by-8
-func updateSlicingBy8(crc uint32, tab *slicing8Table, p []byte) uint32 {
-       crc = ^crc
-       for len(p) > 8 {
-               crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
-               crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^
-                       tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^
-                       tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF]
-               p = p[8:]
-       }
-       crc = ^crc
-       if len(p) == 0 {
-               return crc
-       }
-       return update(crc, &tab[0], p)
-}
-
  // Update returns the result of adding the bytes in p to the crc.
  func Update(crc uint32, tab *Table, p []byte) uint32 {
         switch tab {
         case castagnoliTable:
                 return updateCastagnoli(crc, p)
         case IEEETable:
+               // Unfortunately, because IEEETable is exported, IEEE may be used without a
+               // call to MakeTable. We have to make sure it gets initialized in that case.
+               ieeeOnce.Do(ieeeInit)
                 return updateIEEE(crc, p)
+       default:
+               return simpleUpdate(crc, tab, p)
         }
-       return update(crc, tab, p)
  }
  
  func (d *digest) Write(p []byte) (n int, err error) {
-       d.crc = Update(d.crc, d.tab, p)
+       switch d.tab {
+       case castagnoliTable:
+               d.crc = updateCastagnoli(d.crc, p)
+       case IEEETable:
+               // We only create digest objects through New() which takes care of
+               // initialization in this case.
+               d.crc = updateIEEE(d.crc, p)
+       default:
+               d.crc = simpleUpdate(d.crc, d.tab, p)
+       }
         return len(p), nil
  }
  
@@ -195,4 +201,7 @@ func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) }
  
  // ChecksumIEEE returns the CRC-32 checksum of data
  // using the IEEE polynomial.
-func ChecksumIEEE(data []byte) uint32 { return updateIEEE(0, data) }
+func ChecksumIEEE(data []byte) uint32 {
+       ieeeOnce.Do(ieeeInit)
+       return updateIEEE(0, data)
+}
diff --git a/src/hash/crc32/crc32_amd64.go b/src/hash/crc32/crc32_amd64.go

index a071cbcb8826cb97809b7e1ea3adc75a531a4342..369a436be9b0092053d93ca5d0188c3d42a49078 100644 (file)
--- a/src/hash/crc32/crc32_amd64.go
+++ b/src/hash/crc32/crc32_amd64.go
@@ -2,6 +2,10 @@
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
+// AMD64-specific hardware-assisted CRC32 algorithms. See crc32.go for a
+// description of the interface that each architecture-specific file
+// implements.
+
  package crc32
  
  import "unsafe"
@@ -45,9 +49,13 @@ type sse42Table [4]Table
  var castagnoliSSE42TableK1 *sse42Table
  var castagnoliSSE42TableK2 *sse42Table
  
-func castagnoliInitArch() (needGenericTables bool) {
+func archAvailableCastagnoli() bool {
+       return sse42
+}
+
+func archInitCastagnoli() {
         if !sse42 {
-               return true
+               panic("arch-specific Castagnoli not available")
         }
         castagnoliSSE42TableK1 = new(sse42Table)
         castagnoliSSE42TableK2 = new(sse42Table)
@@ -65,7 +73,6 @@ func castagnoliInitArch() (needGenericTables bool) {
                         castagnoliSSE42TableK2[b][i] = castagnoliSSE42(val, tmp[:])
                 }
         }
-       return false
  }
  
  // castagnoliShift computes the CRC32-C of K1 or K2 zeroes (depending on the
@@ -78,13 +85,9 @@ func castagnoliShift(table *sse42Table, crc uint32) uint32 {
                 table[0][crc&0xFF]
  }
  
-func updateCastagnoli(crc uint32, p []byte) uint32 {
+func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
         if !sse42 {
-               // Use slicing-by-8 on larger inputs.
-               if len(p) >= sliceBy8Cutoff {
-                       return updateSlicingBy8(crc, castagnoliTable8, p)
-               }
-               return update(crc, castagnoliTable, p)
+               panic("not available")
         }
  
         // This method is inspired from the algorithm in Intel's white paper:
@@ -193,24 +196,33 @@ func updateCastagnoli(crc uint32, p []byte) uint32 {
         return ^crc
  }
  
-func updateIEEE(crc uint32, p []byte) uint32 {
-       if useFastIEEE && len(p) >= 64 {
+func archAvailableIEEE() bool {
+       return useFastIEEE
+}
+
+var archIeeeTable8 *slicing8Table
+
+func archInitIEEE() {
+       if !useFastIEEE {
+               panic("not available")
+       }
+       // We still use slicing-by-8 for small buffers.
+       archIeeeTable8 = slicingMakeTable(IEEE)
+}
+
+func archUpdateIEEE(crc uint32, p []byte) uint32 {
+       if !useFastIEEE {
+               panic("not available")
+       }
+
+       if len(p) >= 64 {
                 left := len(p) & 15
                 do := len(p) - left
                 crc = ^ieeeCLMUL(^crc, p[:do])
-               if left > 0 {
-                       crc = update(crc, IEEETable, p[do:])
-               }
-               return crc
+               p = p[do:]
         }
-
-       // Use slicing-by-8 on larger inputs.
-       if len(p) >= sliceBy8Cutoff {
-               ieeeTable8Once.Do(func() {
-                       ieeeTable8 = makeTable8(IEEE)
-               })
-               return updateSlicingBy8(crc, ieeeTable8, p)
+       if len(p) == 0 {
+               return crc
         }
-
-       return update(crc, IEEETable, p)
+       return slicingUpdate(crc, archIeeeTable8, p)
  }
diff --git a/src/hash/crc32/crc32_amd64_test.go b/src/hash/crc32/crc32_amd64_test.go

deleted file mode 100644 (file)

index e136f78..0000000
--- a/src/hash/crc32/crc32_amd64_test.go
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package crc32
-
-import (
-       "math/rand"
-       "testing"
-)
-
-func TestCastagnoliSSE42(t *testing.T) {
-       if !sse42 {
-               t.Skip("SSE42 not supported")
-       }
-
-       // Init the SSE42 tables.
-       castagnoliOnce.Do(castagnoliInit)
-
-       // Generate a table to use with the non-SSE version.
-       slicingTable := makeTable8(Castagnoli)
-
-       // The optimized SSE4.2 implementation behaves differently for different
-       // lengths (especially around multiples of K*3). Crosscheck against the
-       // software implementation for various lengths.
-       for _, base := range []int{castagnoliK1, castagnoliK2, castagnoliK1 + castagnoliK2} {
-               for _, baseMult := range []int{2, 3, 5, 6, 9, 30} {
-                       for _, variation := range []int{0, 1, 2, 3, 4, 7, 10, 16, 32, 50, 128} {
-                               for _, varMult := range []int{-2, -1, +1, +2} {
-                                       length := base*baseMult + variation*varMult
-                                       p := make([]byte, length)
-                                       _, _ = rand.Read(p)
-                                       crcInit := uint32(rand.Int63())
-                                       correct := updateSlicingBy8(crcInit, slicingTable, p)
-                                       result := updateCastagnoli(crcInit, p)
-                                       if result != correct {
-                                               t.Errorf("SSE42 implementation = 0x%x want 0x%x (buffer length %d)",
-                                                       result, correct, len(p))
-                                       }
-                               }
-                       }
-               }
-       }
-}
diff --git a/src/hash/crc32/crc32_amd64p32.go b/src/hash/crc32/crc32_amd64p32.go

index 48d181f295a03462e660da5a5ae1944b5d1fbb9c..9d728fc8feb4fe8d01511ac7ea514a5ceca8a98a 100644 (file)
--- a/src/hash/crc32/crc32_amd64p32.go
+++ b/src/hash/crc32/crc32_amd64p32.go
@@ -11,37 +11,31 @@ package crc32
  // support.
  func haveSSE42() bool
  
-// castagnoliSSE42 is defined in crc32_amd64.s and uses the SSE4.2 CRC32
+// castagnoliSSE42 is defined in crc32_amd64p32.s and uses the SSE4.2 CRC32
  // instruction.
  //go:noescape
  func castagnoliSSE42(crc uint32, p []byte) uint32
  
  var sse42 = haveSSE42()
  
-func castagnoliInitArch() (needGenericTables bool) {
-       // We only need the generic implementation tables if we don't have SSE4.2.
-       return !sse42
+func archAvailableCastagnoli() bool {
+       return sse42
  }
  
-func updateCastagnoli(crc uint32, p []byte) uint32 {
-       if sse42 {
-               return castagnoliSSE42(crc, p)
+func archInitCastagnoli() {
+       if !sse42 {
+               panic("not available")
         }
-       // Use slicing-by-8 on larger inputs.
-       if len(p) >= sliceBy8Cutoff {
-               return updateSlicingBy8(crc, castagnoliTable8, p)
-       }
-       return update(crc, castagnoliTable, p)
+       // No initialization necessary.
  }
  
-func updateIEEE(crc uint32, p []byte) uint32 {
-       // Use slicing-by-8 on larger inputs.
-       if len(p) >= sliceBy8Cutoff {
-               ieeeTable8Once.Do(func() {
-                       ieeeTable8 = makeTable8(IEEE)
-               })
-               return updateSlicingBy8(crc, ieeeTable8, p)
+func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
+       if !sse42 {
+               panic("not available")
         }
-
-       return update(crc, IEEETable, p)
+       return castagnoliSSE42(crc, p)
  }
+
+func archAvailableIEEE() bool                    { return false }
+func archInitIEEE()                              { panic("not available") }
+func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") }
diff --git a/src/hash/crc32/crc32_generic.go b/src/hash/crc32/crc32_generic.go

index decf97306625bea2c78fb0d2411a9924a834ea28..abacbb663d45380e8aff449c2022f75d3499d9cb 100644 (file)
--- a/src/hash/crc32/crc32_generic.go
+++ b/src/hash/crc32/crc32_generic.go
@@ -2,32 +2,88 @@
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
-// +build !amd64,!amd64p32,!s390x
+// This file contains CRC32 algorithms that are not specific to any architecture
+// and don't use hardware acceleration.
+//
+// The simple (and slow) CRC32 implementation only uses a 256*4 bytes table.
+//
+// The slicing-by-8 algorithm is a faster implementation that uses a bigger
+// table (8*256*4 bytes).
  
  package crc32
  
-// This file contains the generic version of updateCastagnoli which does
-// slicing-by-8, or uses the fallback for very small sizes.
+// simpleMakeTable allocates and constructs a Table for the specified
+// polynomial. The table is suitable for use with the simple algorithm
+// (simpleUpdate).
+func simpleMakeTable(poly uint32) *Table {
+       t := new(Table)
+       simplePopulateTable(poly, t)
+       return t
+}
+
+// simplePopulateTable constructs a Table for the specified polynomial, suitable
+// for use with simpleUpdate.
+func simplePopulateTable(poly uint32, t *Table) {
+       for i := 0; i < 256; i++ {
+               crc := uint32(i)
+               for j := 0; j < 8; j++ {
+                       if crc&1 == 1 {
+                               crc = (crc >> 1) ^ poly
+                       } else {
+                               crc >>= 1
+                       }
+               }
+               t[i] = crc
+       }
+}
  
-func castagnoliInitArch() (needGenericTables bool) {
-       return true
+// simpleUpdate uses the simple algorithm to update the CRC, given a table that
+// was previously computed using simpleMakeTable.
+func simpleUpdate(crc uint32, tab *Table, p []byte) uint32 {
+       crc = ^crc
+       for _, v := range p {
+               crc = tab[byte(crc)^v] ^ (crc >> 8)
+       }
+       return ^crc
  }
  
-func updateCastagnoli(crc uint32, p []byte) uint32 {
-       // Use slicing-by-8 on larger inputs.
-       if len(p) >= sliceBy8Cutoff {
-               return updateSlicingBy8(crc, castagnoliTable8, p)
+// Use slicing-by-8 when payload >= this value.
+const slicing8Cutoff = 16
+
+// slicing8Table is array of 8 Tables, used by the slicing-by-8 algorithm.
+type slicing8Table [8]Table
+
+// slicingMakeTable constructs a slicing8Table for the specified polynomial. The
+// table is suitable for use with the slicing-by-8 algorithm (slicingUpdate).
+func slicingMakeTable(poly uint32) *slicing8Table {
+       t := new(slicing8Table)
+       simplePopulateTable(poly, &t[0])
+       for i := 0; i < 256; i++ {
+               crc := t[0][i]
+               for j := 1; j < 8; j++ {
+                       crc = t[0][crc&0xFF] ^ (crc >> 8)
+                       t[j][i] = crc
+               }
         }
-       return update(crc, castagnoliTable, p)
+       return t
  }
  
-func updateIEEE(crc uint32, p []byte) uint32 {
-       // Use slicing-by-8 on larger inputs.
-       if len(p) >= sliceBy8Cutoff {
-               ieeeTable8Once.Do(func() {
-                       ieeeTable8 = makeTable8(IEEE)
-               })
-               return updateSlicingBy8(crc, ieeeTable8, p)
+// slicingUpdate uses the slicing-by-8 algorithm to update the CRC, given a
+// table that was previously computed using slicingMakeTable.
+func slicingUpdate(crc uint32, tab *slicing8Table, p []byte) uint32 {
+       if len(p) >= slicing8Cutoff {
+               crc = ^crc
+               for len(p) > 8 {
+                       crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+                       crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^
+                               tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^
+                               tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF]
+                       p = p[8:]
+               }
+               crc = ^crc
+       }
+       if len(p) == 0 {
+               return crc
         }
-       return update(crc, IEEETable, p)
+       return simpleUpdate(crc, &tab[0], p)
  }
diff --git a/src/hash/crc32/crc32_otherarch.go b/src/hash/crc32/crc32_otherarch.go

new file mode 100644 (file)

index 0000000..cc96076
--- /dev/null
+++ b/src/hash/crc32/crc32_otherarch.go
@@ -0,0 +1,15 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64,!amd64p32,!s390x
+
+package crc32
+
+func archAvailableIEEE() bool                    { return false }
+func archInitIEEE()                              { panic("not available") }
+func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") }
+
+func archAvailableCastagnoli() bool                    { return false }
+func archInitCastagnoli()                              { panic("not available") }
+func archUpdateCastagnoli(crc uint32, p []byte) uint32 { panic("not available") }
diff --git a/src/hash/crc32/crc32_s390x.go b/src/hash/crc32/crc32_s390x.go

index 72d2648280ab54277f4472fabc09c0984037ca87..d13000d0584967326d761e5bccdf26bf28742866 100644 (file)
--- a/src/hash/crc32/crc32_s390x.go
+++ b/src/hash/crc32/crc32_s390x.go
@@ -25,62 +25,65 @@ func vectorizedCastagnoli(crc uint32, p []byte) uint32
  //go:noescape
  func vectorizedIEEE(crc uint32, p []byte) uint32
  
-func castagnoliInitArch() (needGenericTables bool) {
-       return true
+func archAvailableCastagnoli() bool {
+       return hasVX
  }
  
-func genericCastagnoli(crc uint32, p []byte) uint32 {
-       // Use slicing-by-8 on larger inputs.
-       if len(p) >= sliceBy8Cutoff {
-               return updateSlicingBy8(crc, castagnoliTable8, p)
-       }
-       return update(crc, castagnoliTable, p)
-}
+var archCastagnoliTable8 *slicing8Table
  
-func genericIEEE(crc uint32, p []byte) uint32 {
-       // Use slicing-by-8 on larger inputs.
-       if len(p) >= sliceBy8Cutoff {
-               ieeeTable8Once.Do(func() {
-                       ieeeTable8 = makeTable8(IEEE)
-               })
-               return updateSlicingBy8(crc, ieeeTable8, p)
+func archInitCastagnoli() {
+       if !hasVX {
+               panic("not available")
         }
-       return update(crc, IEEETable, p)
+       // We still use slicing-by-8 for small buffers.
+       archCastagnoliTable8 = slicingMakeTable(Castagnoli)
  }
  
-// updateCastagnoli calculates the checksum of p using
-// vectorizedCastagnoli if possible and falling back onto
-// genericCastagnoli as needed.
-func updateCastagnoli(crc uint32, p []byte) uint32 {
-       // Use vectorized function if vector facility is available and
-       // data length is above threshold.
-       if hasVX && len(p) >= vxMinLen {
+// archUpdateCastagnoli calculates the checksum of p using
+// vectorizedCastagnoli.
+func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
+       if !hasVX {
+               panic("not available")
+       }
+       // Use vectorized function if data length is above threshold.
+       if len(p) >= vxMinLen {
                 aligned := len(p) & ^vxAlignMask
                 crc = vectorizedCastagnoli(crc, p[:aligned])
                 p = p[aligned:]
-               // process remaining data
-               if len(p) > 0 {
-                       crc = genericCastagnoli(crc, p)
-               }
+       }
+       if len(p) == 0 {
                 return crc
         }
-       return genericCastagnoli(crc, p)
+       return slicingUpdate(crc, archCastagnoliTable8, p)
+}
+
+func archAvailableIEEE() bool {
+       return hasVX
+}
+
+var archIeeeTable8 *slicing8Table
+
+func archInitIEEE() {
+       if !hasVX {
+               panic("not available")
+       }
+       // We still use slicing-by-8 for small buffers.
+       archIeeeTable8 = slicingMakeTable(IEEE)
  }
  
-// updateIEEE calculates the checksum of p using vectorizedIEEE if
-// possible and falling back onto genericIEEE as needed.
-func updateIEEE(crc uint32, p []byte) uint32 {
-       // Use vectorized function if vector facility is available and
-       // data length is above threshold.
-       if hasVX && len(p) >= vxMinLen {
+// archUpdateIEEE calculates the checksum of p using vectorizedIEEE.
+func archUpdateIEEE(crc uint32, p []byte) uint32 {
+       if !hasVX {
+               panic("not available")
+       }
+       // Use vectorized function if data length is above threshold.
+       if len(p) >= vxMinLen {
                 aligned := len(p) & ^vxAlignMask
                 crc = vectorizedIEEE(crc, p[:aligned])
                 p = p[aligned:]
-               // process remaining data
-               if len(p) > 0 {
-                       crc = genericIEEE(crc, p)
-               }
+       }
+       if len(p) == 0 {
                 return crc
         }
-       return genericIEEE(crc, p)
+       return slicingUpdate(crc, archIeeeTable8, p)
  }
diff --git a/src/hash/crc32/crc32_test.go b/src/hash/crc32/crc32_test.go

index 7f7f0a2f744877e8311a9c230ec5cd8f0be142a0..1356734d50455b89b3bdf86add81371b92d360e1 100644 (file)
--- a/src/hash/crc32/crc32_test.go
+++ b/src/hash/crc32/crc32_test.go
@@ -6,7 +6,7 @@ package crc32
  
  import (
         "hash"
-       "io"
+       "math/rand"
         "testing"
  )
  
@@ -49,42 +49,150 @@ var golden = []test{
         {0x8e0bb443, 0xdcded527, "How can you write a big system without C++?  -Paul Glick"},
  }
  
+// testGoldenIEEE verifies that the given function returns
+// correct IEEE checksums.
+func testGoldenIEEE(t *testing.T, crcFunc func(b []byte) uint32) {
+       for _, g := range golden {
+               if crc := crcFunc([]byte(g.in)); crc != g.ieee {
+                       t.Errorf("IEEE(%s) = 0x%x want 0x%x", g.in, crc, g.ieee)
+               }
+       }
+}
+
+// testGoldenCastagnoli verifies that the given function returns
+// correct IEEE checksums.
+func testGoldenCastagnoli(t *testing.T, crcFunc func(b []byte) uint32) {
+       for _, g := range golden {
+               if crc := crcFunc([]byte(g.in)); crc != g.castagnoli {
+                       t.Errorf("Castagnoli(%s) = 0x%x want 0x%x", g.in, crc, g.castagnoli)
+               }
+       }
+}
+
+// testCrossCheck generates random buffers of various lengths and verifies that
+// the two "update" functions return the same result.
+func testCrossCheck(t *testing.T, crcFunc1, crcFunc2 func(crc uint32, b []byte) uint32) {
+       // The AMD64 implementation has some cutoffs at lengths 168*3=504 and
+       // 1344*3=4032. We should make sure lengths around these values are in the
+       // list.
+       lengths := []int{0, 1, 2, 3, 4, 5, 10, 16, 50, 100, 128,
+               500, 501, 502, 503, 504, 505, 512, 1000, 1024, 2000,
+               4030, 4031, 4032, 4033, 4036, 4040, 4048, 4096, 5000, 10000}
+       for _, length := range lengths {
+               p := make([]byte, length)
+               _, _ = rand.Read(p)
+               crcInit := uint32(rand.Int63())
+               crc1 := crcFunc1(crcInit, p)
+               crc2 := crcFunc2(crcInit, p)
+               if crc1 != crc2 {
+                       t.Errorf("mismatch: 0x%x vs 0x%x (buffer length %d)", crc1, crc2, length)
+               }
+       }
+}
+
+// TestSimple tests the simple generic algorithm.
+func TestSimple(t *testing.T) {
+       tab := simpleMakeTable(IEEE)
+       testGoldenIEEE(t, func(b []byte) uint32 {
+               return simpleUpdate(0, tab, b)
+       })
+
+       tab = simpleMakeTable(Castagnoli)
+       testGoldenCastagnoli(t, func(b []byte) uint32 {
+               return simpleUpdate(0, tab, b)
+       })
+}
+
+// TestSimple tests the slicing-by-8 algorithm.
+func TestSlicing(t *testing.T) {
+       tab := slicingMakeTable(IEEE)
+       testGoldenIEEE(t, func(b []byte) uint32 {
+               return slicingUpdate(0, tab, b)
+       })
+
+       tab = slicingMakeTable(Castagnoli)
+       testGoldenCastagnoli(t, func(b []byte) uint32 {
+               return slicingUpdate(0, tab, b)
+       })
+
+       // Cross-check various polys against the simple algorithm.
+       for _, poly := range []uint32{IEEE, Castagnoli, Koopman, 0xD5828281} {
+               t1 := simpleMakeTable(poly)
+               f1 := func(crc uint32, b []byte) uint32 {
+                       return simpleUpdate(crc, t1, b)
+               }
+               t2 := slicingMakeTable(poly)
+               f2 := func(crc uint32, b []byte) uint32 {
+                       return slicingUpdate(crc, t2, b)
+               }
+               testCrossCheck(t, f1, f2)
+       }
+}
+
+func TestArchIEEE(t *testing.T) {
+       if !archAvailableIEEE() {
+               t.Skip("Arch-specific IEEE not available.")
+       }
+       archInitIEEE()
+       slicingTable := slicingMakeTable(IEEE)
+       testCrossCheck(t, archUpdateIEEE, func(crc uint32, b []byte) uint32 {
+               return slicingUpdate(crc, slicingTable, b)
+       })
+}
+
+func TestArchCastagnoli(t *testing.T) {
+       if !archAvailableCastagnoli() {
+               t.Skip("Arch-specific Castagnoli not available.")
+       }
+       archInitCastagnoli()
+       slicingTable := slicingMakeTable(Castagnoli)
+       testCrossCheck(t, archUpdateCastagnoli, func(crc uint32, b []byte) uint32 {
+               return slicingUpdate(crc, slicingTable, b)
+       })
+}
+
  func TestGolden(t *testing.T) {
+       testGoldenIEEE(t, ChecksumIEEE)
+
+       // Some implementations have special code to deal with misaligned
+       // data; test that as well.
+       for delta := 1; delta <= 7; delta++ {
+               testGoldenIEEE(t, func(b []byte) uint32 {
+                       ieee := NewIEEE()
+                       d := delta
+                       if d >= len(b) {
+                               d = len(b)
+                       }
+                       ieee.Write(b[:d])
+                       ieee.Write(b[d:])
+                       return ieee.Sum32()
+               })
+       }
+
         castagnoliTab := MakeTable(Castagnoli)
         if castagnoliTab == nil {
                 t.Errorf("nil Castagnoli Table")
         }
  
-       for _, g := range golden {
-               ieee := NewIEEE()
-               io.WriteString(ieee, g.in)
-               s := ieee.Sum32()
-               if s != g.ieee {
-                       t.Errorf("IEEE(%s) = 0x%x want 0x%x", g.in, s, g.ieee)
-               }
-
+       testGoldenCastagnoli(t, func(b []byte) uint32 {
                 castagnoli := New(castagnoliTab)
-               io.WriteString(castagnoli, g.in)
-               s = castagnoli.Sum32()
-               if s != g.castagnoli {
-                       t.Errorf("Castagnoli(%s) = 0x%x want 0x%x", g.in, s, g.castagnoli)
-               }
+               castagnoli.Write(b)
+               return castagnoli.Sum32()
+       })
  
-               // The SSE4.2 implementation of this has code to deal
-               // with misaligned data so we ensure that we test that
-               // too.
-               for delta := 1; delta <= 7; delta++ {
-                       if len(g.in) > delta {
-                               in := []byte(g.in)
-                               castagnoli = New(castagnoliTab)
-                               castagnoli.Write(in[:delta])
-                               castagnoli.Write(in[delta:])
-                               s = castagnoli.Sum32()
-                               if s != g.castagnoli {
-                                       t.Errorf("Castagnoli[misaligned](%s) = 0x%x want 0x%x", g.in, s, g.castagnoli)
-                               }
+       // Some implementations have special code to deal with misaligned
+       // data; test that as well.
+       for delta := 1; delta <= 7; delta++ {
+               testGoldenCastagnoli(t, func(b []byte) uint32 {
+                       castagnoli := New(castagnoliTab)
+                       d := delta
+                       if d >= len(b) {
+                               d = len(b)
                         }
-               }
+                       castagnoli.Write(b[:d])
+                       castagnoli.Write(b[d:])
+                       return castagnoli.Sum32()
+               })
         }
  }
author	Radu Berinde <radu@cockroachlabs.com>
	Sun, 28 Aug 2016 18:36:06 +0000 (14:36 -0400)
committer	Brad Fitzpatrick <bradfitz@golang.org>
	Wed, 31 Aug 2016 15:17:57 +0000 (15:17 +0000)
src/hash/crc32/crc32.go		patch \| blob \| history
src/hash/crc32/crc32_amd64.go		patch \| blob \| history
src/hash/crc32/crc32_amd64_test.go	[deleted file]	patch \| blob \| history
src/hash/crc32/crc32_amd64p32.go		patch \| blob \| history
src/hash/crc32/crc32_generic.go		patch \| blob \| history
src/hash/crc32/crc32_otherarch.go	[new file with mode: 0644]	patch \| blob
src/hash/crc32/crc32_s390x.go		patch \| blob \| history
src/hash/crc32/crc32_test.go		patch \| blob \| history